diff options
Diffstat (limited to '')
72 files changed, 1863 insertions, 640 deletions
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/GamesFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/GamesFragment.kt index fc0eeb9ad..54380323e 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/GamesFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/GamesFragment.kt @@ -91,18 +91,20 @@ class GamesFragment : Fragment() { viewLifecycleOwner.lifecycleScope.apply { launch { repeatOnLifecycle(Lifecycle.State.RESUMED) { - gamesViewModel.isReloading.collect { binding.swipeRefresh.isRefreshing = it } + gamesViewModel.isReloading.collect { + binding.swipeRefresh.isRefreshing = it + if (gamesViewModel.games.value.isEmpty() && !it) { + binding.noticeText.visibility = View.VISIBLE + } else { + binding.noticeText.visibility = View.INVISIBLE + } + } } } launch { repeatOnLifecycle(Lifecycle.State.RESUMED) { gamesViewModel.games.collectLatest { (binding.gridGames.adapter as GameAdapter).submitList(it) - if (it.isEmpty()) { - binding.noticeText.visibility = View.VISIBLE - } else { - binding.noticeText.visibility = View.GONE - } } } } diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp index 698c9c8ad..5dc7e5d59 100644 --- a/src/core/arm/arm_interface.cpp +++ b/src/core/arm/arm_interface.cpp @@ -9,7 +9,7 @@ namespace Core { -void ArmInterface::LogBacktrace(const Kernel::KProcess* process) const { +void ArmInterface::LogBacktrace(Kernel::KProcess* process) const { Kernel::Svc::ThreadContext ctx; this->GetContext(ctx); diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 806c7c9e9..495963eef 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -95,7 +95,7 @@ public: virtual void SignalInterrupt(Kernel::KThread* thread) = 0; // Stack trace generation. - void LogBacktrace(const Kernel::KProcess* process) const; + void LogBacktrace(Kernel::KProcess* process) const; // Debug functionality. virtual const Kernel::DebugWatchpoint* HaltedWatchpoint() const = 0; diff --git a/src/core/arm/debug.cpp b/src/core/arm/debug.cpp index af1c34bc3..854509463 100644 --- a/src/core/arm/debug.cpp +++ b/src/core/arm/debug.cpp @@ -79,7 +79,7 @@ constexpr std::array<u64, 2> SegmentBases{ 0x7100000000ULL, }; -void SymbolicateBacktrace(const Kernel::KProcess* process, std::vector<BacktraceEntry>& out) { +void SymbolicateBacktrace(Kernel::KProcess* process, std::vector<BacktraceEntry>& out) { auto modules = FindModules(process); const bool is_64 = process->Is64Bit(); @@ -118,7 +118,7 @@ void SymbolicateBacktrace(const Kernel::KProcess* process, std::vector<Backtrace } } -std::vector<BacktraceEntry> GetAArch64Backtrace(const Kernel::KProcess* process, +std::vector<BacktraceEntry> GetAArch64Backtrace(Kernel::KProcess* process, const Kernel::Svc::ThreadContext& ctx) { std::vector<BacktraceEntry> out; auto& memory = process->GetMemory(); @@ -144,7 +144,7 @@ std::vector<BacktraceEntry> GetAArch64Backtrace(const Kernel::KProcess* process, return out; } -std::vector<BacktraceEntry> GetAArch32Backtrace(const Kernel::KProcess* process, +std::vector<BacktraceEntry> GetAArch32Backtrace(Kernel::KProcess* process, const Kernel::Svc::ThreadContext& ctx) { std::vector<BacktraceEntry> out; auto& memory = process->GetMemory(); @@ -173,7 +173,7 @@ std::vector<BacktraceEntry> GetAArch32Backtrace(const Kernel::KProcess* process, } // namespace std::optional<std::string> GetThreadName(const Kernel::KThread* thread) { - const auto* process = thread->GetOwnerProcess(); + auto* process = thread->GetOwnerProcess(); if (process->Is64Bit()) { return GetNameFromThreadType64(process->GetMemory(), *thread); } else { @@ -248,7 +248,7 @@ Kernel::KProcessAddress GetModuleEnd(const Kernel::KProcess* process, return cur_addr - 1; } -Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process) { +Loader::AppLoader::Modules FindModules(Kernel::KProcess* process) { Loader::AppLoader::Modules modules; auto& page_table = process->GetPageTable(); @@ -312,7 +312,7 @@ Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process) { return modules; } -Kernel::KProcessAddress FindMainModuleEntrypoint(const Kernel::KProcess* process) { +Kernel::KProcessAddress FindMainModuleEntrypoint(Kernel::KProcess* process) { // Do we have any loaded executable sections? auto modules = FindModules(process); @@ -337,7 +337,7 @@ void InvalidateInstructionCacheRange(const Kernel::KProcess* process, u64 addres } } -std::vector<BacktraceEntry> GetBacktraceFromContext(const Kernel::KProcess* process, +std::vector<BacktraceEntry> GetBacktraceFromContext(Kernel::KProcess* process, const Kernel::Svc::ThreadContext& ctx) { if (process->Is64Bit()) { return GetAArch64Backtrace(process, ctx); diff --git a/src/core/arm/debug.h b/src/core/arm/debug.h index c542633db..3cd671365 100644 --- a/src/core/arm/debug.h +++ b/src/core/arm/debug.h @@ -14,9 +14,9 @@ std::optional<std::string> GetThreadName(const Kernel::KThread* thread); std::string_view GetThreadWaitReason(const Kernel::KThread* thread); std::string GetThreadState(const Kernel::KThread* thread); -Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process); +Loader::AppLoader::Modules FindModules(Kernel::KProcess* process); Kernel::KProcessAddress GetModuleEnd(const Kernel::KProcess* process, Kernel::KProcessAddress base); -Kernel::KProcessAddress FindMainModuleEntrypoint(const Kernel::KProcess* process); +Kernel::KProcessAddress FindMainModuleEntrypoint(Kernel::KProcess* process); void InvalidateInstructionCacheRange(const Kernel::KProcess* process, u64 address, u64 size); @@ -28,7 +28,7 @@ struct BacktraceEntry { std::string name; }; -std::vector<BacktraceEntry> GetBacktraceFromContext(const Kernel::KProcess* process, +std::vector<BacktraceEntry> GetBacktraceFromContext(Kernel::KProcess* process, const Kernel::Svc::ThreadContext& ctx); std::vector<BacktraceEntry> GetBacktrace(const Kernel::KThread* thread); diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index f34865e26..c78cfd528 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -15,7 +15,7 @@ using namespace Common::Literals; class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks { public: - explicit DynarmicCallbacks32(ArmDynarmic32& parent, const Kernel::KProcess* process) + explicit DynarmicCallbacks32(ArmDynarmic32& parent, Kernel::KProcess* process) : m_parent{parent}, m_memory(process->GetMemory()), m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()}, m_check_memory_access{m_debugger_enabled || @@ -169,7 +169,7 @@ public: ArmDynarmic32& m_parent; Core::Memory::Memory& m_memory; - const Kernel::KProcess* m_process{}; + Kernel::KProcess* m_process{}; const bool m_debugger_enabled{}; const bool m_check_memory_access{}; static constexpr u64 MinimumRunCycles = 10000U; @@ -370,7 +370,7 @@ void ArmDynarmic32::RewindBreakpointInstruction() { this->SetContext(m_breakpoint_context); } -ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, const Kernel::KProcess* process, +ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process, DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index) : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor}, m_cb(std::make_unique<DynarmicCallbacks32>(*this, process)), diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index 185ac7cbf..b580efe61 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -20,7 +20,7 @@ class System; class ArmDynarmic32 final : public ArmInterface { public: - ArmDynarmic32(System& system, bool uses_wall_clock, const Kernel::KProcess* process, + ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process, DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index); ~ArmDynarmic32() override; diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index dff14756e..f351b13d9 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -15,7 +15,7 @@ using namespace Common::Literals; class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks { public: - explicit DynarmicCallbacks64(ArmDynarmic64& parent, const Kernel::KProcess* process) + explicit DynarmicCallbacks64(ArmDynarmic64& parent, Kernel::KProcess* process) : m_parent{parent}, m_memory(process->GetMemory()), m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()}, m_check_memory_access{m_debugger_enabled || @@ -216,7 +216,7 @@ public: Core::Memory::Memory& m_memory; u64 m_tpidrro_el0{}; u64 m_tpidr_el0{}; - const Kernel::KProcess* m_process{}; + Kernel::KProcess* m_process{}; const bool m_debugger_enabled{}; const bool m_check_memory_access{}; static constexpr u64 MinimumRunCycles = 10000U; @@ -399,7 +399,7 @@ void ArmDynarmic64::RewindBreakpointInstruction() { this->SetContext(m_breakpoint_context); } -ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, const Kernel::KProcess* process, +ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process, DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index) : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor}, m_cb(std::make_unique<DynarmicCallbacks64>(*this, process)), m_core_index{core_index} { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 4f3dd026f..08cd982b3 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -25,7 +25,7 @@ class System; class ArmDynarmic64 final : public ArmInterface { public: - ArmDynarmic64(System& system, bool uses_wall_clock, const Kernel::KProcess* process, + ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process, DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index); ~ArmDynarmic64() override; diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 1311e66a9..123b3da7e 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -39,7 +39,7 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { } using namespace Common::Literals; -constexpr u32 StackSize = 32_KiB; +constexpr u32 StackSize = 128_KiB; } // namespace diff --git a/src/core/arm/nce/interpreter_visitor.cpp b/src/core/arm/nce/interpreter_visitor.cpp index 8e81c66a5..def888d15 100644 --- a/src/core/arm/nce/interpreter_visitor.cpp +++ b/src/core/arm/nce/interpreter_visitor.cpp @@ -5,8 +5,6 @@ #include "common/bit_cast.h" #include "core/arm/nce/interpreter_visitor.h" -#include <dynarmic/frontend/A64/decoder/a64.h> - namespace Core { template <u32 BitSize> @@ -249,6 +247,7 @@ bool InterpreterVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) { return false; } + // Size in bytes const u64 size = 4 << opc.ZeroExtend(); const u64 offset = imm19.SignExtend<u64>() << 2; const u64 address = this->GetPc() + offset; @@ -530,7 +529,7 @@ bool InterpreterVisitor::SIMDImmediate(bool wback, bool postindex, size_t scale, } case MemOp::Load: { u128 data{}; - m_memory.ReadBlock(address, &data, datasize); + m_memory.ReadBlock(address, &data, datasize / 8); this->SetVec(Vt, data); break; } diff --git a/src/core/arm/nce/visitor_base.h b/src/core/arm/nce/visitor_base.h index 8fb032912..6a2be3d9b 100644 --- a/src/core/arm/nce/visitor_base.h +++ b/src/core/arm/nce/visitor_base.h @@ -4,9 +4,15 @@ #pragma once +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + #include <dynarmic/frontend/A64/a64_types.h> +#include <dynarmic/frontend/A64/decoder/a64.h> #include <dynarmic/frontend/imm.h> +#pragma GCC diagnostic pop + namespace Core { class VisitorBase { diff --git a/src/core/core.cpp b/src/core/core.cpp index b14f74976..66f444d39 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -28,7 +28,6 @@ #include "core/file_sys/savedata_factory.h" #include "core/file_sys/vfs_concat.h" #include "core/file_sys/vfs_real.h" -#include "core/gpu_dirty_memory_manager.h" #include "core/hid/hid_core.h" #include "core/hle/kernel/k_memory_manager.h" #include "core/hle/kernel/k_process.h" @@ -130,11 +129,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, struct System::Impl { explicit Impl(System& system) - : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, - cpu_manager{system}, reporter{system}, applet_manager{system}, profile_manager{}, - time_manager{system}, gpu_dirty_memory_write_manager{} { - memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); - } + : kernel{system}, fs_controller{system}, hid_core{}, room_network{}, cpu_manager{system}, + reporter{system}, applet_manager{system}, profile_manager{}, time_manager{system} {} void Initialize(System& system) { device_memory = std::make_unique<Core::DeviceMemory>(); @@ -241,17 +237,17 @@ struct System::Impl { debugger = std::make_unique<Debugger>(system, port); } - SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) { + void InitializeKernel(System& system) { LOG_DEBUG(Core, "initialized OK"); // Setting changes may require a full system reinitialization (e.g., disabling multicore). ReinitializeIfNecessary(system); - memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); - kernel.Initialize(); cpu_manager.Initialize(); + } + SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) { /// Reset all glue registrations arp_manager.ResetAll(); @@ -300,17 +296,9 @@ struct System::Impl { return SystemResultStatus::ErrorGetLoader; } - SystemResultStatus init_result{SetupForApplicationProcess(system, emu_window)}; - if (init_result != SystemResultStatus::Success) { - LOG_CRITICAL(Core, "Failed to initialize system (Error {})!", - static_cast<int>(init_result)); - ShutdownMainProcess(); - return init_result; - } - - telemetry_session->AddInitialInfo(*app_loader, fs_controller, *content_provider); + InitializeKernel(system); - // Create the process. + // Create the application process. auto main_process = Kernel::KProcess::Create(system.Kernel()); Kernel::KProcess::Register(system.Kernel(), main_process); kernel.AppendNewProcess(main_process); @@ -323,7 +311,18 @@ struct System::Impl { return static_cast<SystemResultStatus>( static_cast<u32>(SystemResultStatus::ErrorLoader) + static_cast<u32>(load_result)); } + + // Set up the rest of the system. + SystemResultStatus init_result{SetupForApplicationProcess(system, emu_window)}; + if (init_result != SystemResultStatus::Success) { + LOG_CRITICAL(Core, "Failed to initialize system (Error {})!", + static_cast<int>(init_result)); + ShutdownMainProcess(); + return init_result; + } + AddGlueRegistrationForProcess(*app_loader, *main_process); + telemetry_session->AddInitialInfo(*app_loader, fs_controller, *content_provider); // Initialize cheat engine if (cheat_engine) { @@ -426,7 +425,6 @@ struct System::Impl { cpu_manager.Shutdown(); debugger.reset(); kernel.Shutdown(); - memory.Reset(); Network::RestartSocketOperations(); if (auto room_member = room_network.GetRoomMember().lock()) { @@ -507,7 +505,6 @@ struct System::Impl { std::unique_ptr<Tegra::Host1x::Host1x> host1x_core; std::unique_ptr<Core::DeviceMemory> device_memory; std::unique_ptr<AudioCore::AudioCore> audio_core; - Core::Memory::Memory memory; Core::HID::HIDCore hid_core; Network::RoomNetwork room_network; @@ -567,9 +564,6 @@ struct System::Impl { std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; - std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> - gpu_dirty_memory_write_manager{}; - std::deque<std::vector<u8>> user_channel; }; @@ -652,29 +646,12 @@ void System::PrepareReschedule(const u32 core_index) { impl->kernel.PrepareReschedule(core_index); } -Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() { - const std::size_t core = impl->kernel.GetCurrentHostThreadID(); - return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES - ? core - : Core::Hardware::NUM_CPU_CORES - 1]; -} - -/// Provides a constant reference to the current gou dirty memory manager. -const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const { - const std::size_t core = impl->kernel.GetCurrentHostThreadID(); - return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES - ? core - : Core::Hardware::NUM_CPU_CORES - 1]; -} - size_t System::GetCurrentHostThreadID() const { return impl->kernel.GetCurrentHostThreadID(); } void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { - for (auto& manager : impl->gpu_dirty_memory_write_manager) { - manager.Gather(callback); - } + return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); } PerfStatsResults System::GetAndResetPerfStats() { @@ -723,20 +700,12 @@ const Kernel::KProcess* System::ApplicationProcess() const { return impl->kernel.ApplicationProcess(); } -ExclusiveMonitor& System::Monitor() { - return impl->kernel.GetExclusiveMonitor(); -} - -const ExclusiveMonitor& System::Monitor() const { - return impl->kernel.GetExclusiveMonitor(); -} - Memory::Memory& System::ApplicationMemory() { - return impl->memory; + return impl->kernel.ApplicationProcess()->GetMemory(); } const Core::Memory::Memory& System::ApplicationMemory() const { - return impl->memory; + return impl->kernel.ApplicationProcess()->GetMemory(); } Tegra::GPU& System::GPU() { diff --git a/src/core/core.h b/src/core/core.h index 473204db7..ba5add0dc 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -116,7 +116,6 @@ class CpuManager; class Debugger; class DeviceMemory; class ExclusiveMonitor; -class GPUDirtyMemoryManager; class PerfStats; class Reporter; class SpeedLimiter; @@ -225,12 +224,6 @@ public: /// Prepare the core emulation for a reschedule void PrepareReschedule(u32 core_index); - /// Provides a reference to the gou dirty memory manager. - [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager(); - - /// Provides a constant reference to the current gou dirty memory manager. - [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const; - void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); [[nodiscard]] size_t GetCurrentHostThreadID() const; @@ -250,12 +243,6 @@ public: /// Gets a const reference to the underlying CPU manager [[nodiscard]] const CpuManager& GetCpuManager() const; - /// Gets a reference to the exclusive monitor - [[nodiscard]] ExclusiveMonitor& Monitor(); - - /// Gets a constant reference to the exclusive monitor - [[nodiscard]] const ExclusiveMonitor& Monitor() const; - /// Gets a mutable reference to the system memory instance. [[nodiscard]] Core::Memory::Memory& ApplicationMemory(); diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp index 763a44fee..539c7f7af 100644 --- a/src/core/file_sys/program_metadata.cpp +++ b/src/core/file_sys/program_metadata.cpp @@ -166,6 +166,10 @@ u32 ProgramMetadata::GetSystemResourceSize() const { return npdm_header.system_resource_size; } +PoolPartition ProgramMetadata::GetPoolPartition() const { + return acid_header.pool_partition; +} + const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { return aci_kernel_capabilities; } @@ -201,7 +205,7 @@ void ProgramMetadata::Print() const { // Begin ACID printing (potential perms, signed) LOG_DEBUG(Service_FS, "Magic: {:.4}", acid_header.magic.data()); LOG_DEBUG(Service_FS, "Flags: 0x{:02X}", acid_header.flags); - LOG_DEBUG(Service_FS, " > Is Retail: {}", acid_header.is_retail ? "YES" : "NO"); + LOG_DEBUG(Service_FS, " > Is Retail: {}", acid_header.production_flag ? "YES" : "NO"); LOG_DEBUG(Service_FS, "Title ID Min: 0x{:016X}", acid_header.title_id_min); LOG_DEBUG(Service_FS, "Title ID Max: 0x{:016X}", acid_header.title_id_max); LOG_DEBUG(Service_FS, "Filesystem Access: 0x{:016X}\n", acid_file_access.permissions); diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h index 76ee97d78..a53092b87 100644 --- a/src/core/file_sys/program_metadata.h +++ b/src/core/file_sys/program_metadata.h @@ -34,6 +34,13 @@ enum class ProgramFilePermission : u64 { Everything = 1ULL << 63, }; +enum class PoolPartition : u32 { + Application = 0, + Applet = 1, + System = 2, + SystemNonSecure = 3, +}; + /** * Helper which implements an interface to parse Program Description Metadata (NPDM) * Data can either be loaded from a file path or with data and an offset into it. @@ -72,6 +79,7 @@ public: u64 GetTitleID() const; u64 GetFilesystemPermissions() const; u32 GetSystemResourceSize() const; + PoolPartition GetPoolPartition() const; const KernelCapabilityDescriptors& GetKernelCapabilities() const; const std::array<u8, 0x10>& GetName() const { return npdm_header.application_name; @@ -116,8 +124,9 @@ private: union { u32 flags; - BitField<0, 1, u32> is_retail; - BitField<1, 31, u32> flags_unk; + BitField<0, 1, u32> production_flag; + BitField<1, 1, u32> unqualified_approval; + BitField<2, 4, PoolPartition> pool_partition; }; u64_le title_id_min; u64_le title_id_max; diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp index 78d43d729..48889253d 100644 --- a/src/core/hle/kernel/k_address_arbiter.cpp +++ b/src/core/hle/kernel/k_address_arbiter.cpp @@ -4,6 +4,7 @@ #include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/hle/kernel/k_address_arbiter.h" +#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/k_thread.h" @@ -26,9 +27,9 @@ bool ReadFromUser(KernelCore& kernel, s32* out, KProcessAddress address) { return true; } -bool DecrementIfLessThan(Core::System& system, s32* out, KProcessAddress address, s32 value) { - auto& monitor = system.Monitor(); - const auto current_core = system.Kernel().CurrentPhysicalCoreIndex(); +bool DecrementIfLessThan(KernelCore& kernel, s32* out, KProcessAddress address, s32 value) { + auto& monitor = GetCurrentProcess(kernel).GetExclusiveMonitor(); + const auto current_core = kernel.CurrentPhysicalCoreIndex(); // NOTE: If scheduler lock is not held here, interrupt disable is required. // KScopedInterruptDisable di; @@ -66,10 +67,10 @@ bool DecrementIfLessThan(Core::System& system, s32* out, KProcessAddress address return true; } -bool UpdateIfEqual(Core::System& system, s32* out, KProcessAddress address, s32 value, +bool UpdateIfEqual(KernelCore& kernel, s32* out, KProcessAddress address, s32 value, s32 new_value) { - auto& monitor = system.Monitor(); - const auto current_core = system.Kernel().CurrentPhysicalCoreIndex(); + auto& monitor = GetCurrentProcess(kernel).GetExclusiveMonitor(); + const auto current_core = kernel.CurrentPhysicalCoreIndex(); // NOTE: If scheduler lock is not held here, interrupt disable is required. // KScopedInterruptDisable di; @@ -159,7 +160,7 @@ Result KAddressArbiter::SignalAndIncrementIfEqual(uint64_t addr, s32 value, s32 // Check the userspace value. s32 user_value{}; - R_UNLESS(UpdateIfEqual(m_system, std::addressof(user_value), addr, value, value + 1), + R_UNLESS(UpdateIfEqual(m_kernel, std::addressof(user_value), addr, value, value + 1), ResultInvalidCurrentMemory); R_UNLESS(user_value == value, ResultInvalidState); @@ -219,7 +220,7 @@ Result KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(uint64_t addr, s32 s32 user_value{}; bool succeeded{}; if (value != new_value) { - succeeded = UpdateIfEqual(m_system, std::addressof(user_value), addr, value, new_value); + succeeded = UpdateIfEqual(m_kernel, std::addressof(user_value), addr, value, new_value); } else { succeeded = ReadFromUser(m_kernel, std::addressof(user_value), addr); } @@ -262,7 +263,7 @@ Result KAddressArbiter::WaitIfLessThan(uint64_t addr, s32 value, bool decrement, s32 user_value{}; bool succeeded{}; if (decrement) { - succeeded = DecrementIfLessThan(m_system, std::addressof(user_value), addr, value); + succeeded = DecrementIfLessThan(m_kernel, std::addressof(user_value), addr, value); } else { succeeded = ReadFromUser(m_kernel, std::addressof(user_value), addr); } diff --git a/src/core/hle/kernel/k_client_port.cpp b/src/core/hle/kernel/k_client_port.cpp index 11b1b977e..68cea978a 100644 --- a/src/core/hle/kernel/k_client_port.cpp +++ b/src/core/hle/kernel/k_client_port.cpp @@ -58,9 +58,8 @@ Result KClientPort::CreateSession(KClientSession** out) { KSession* session{}; // Reserve a new session from the resource limit. - //! FIXME: we are reserving this from the wrong resource limit! - KScopedResourceReservation session_reservation( - m_kernel.ApplicationProcess()->GetResourceLimit(), LimitableResource::SessionCountMax); + KScopedResourceReservation session_reservation(GetCurrentProcessPointer(m_kernel), + LimitableResource::SessionCountMax); R_UNLESS(session_reservation.Succeeded(), ResultLimitReached); // Allocate a session normally. diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp index 7633a51fb..94ea3527a 100644 --- a/src/core/hle/kernel/k_condition_variable.cpp +++ b/src/core/hle/kernel/k_condition_variable.cpp @@ -28,10 +28,10 @@ bool WriteToUser(KernelCore& kernel, KProcessAddress address, const u32* p) { return true; } -bool UpdateLockAtomic(Core::System& system, u32* out, KProcessAddress address, u32 if_zero, +bool UpdateLockAtomic(KernelCore& kernel, u32* out, KProcessAddress address, u32 if_zero, u32 new_orr_mask) { - auto& monitor = system.Monitor(); - const auto current_core = system.Kernel().CurrentPhysicalCoreIndex(); + auto& monitor = GetCurrentProcess(kernel).GetExclusiveMonitor(); + const auto current_core = kernel.CurrentPhysicalCoreIndex(); u32 expected{}; @@ -208,7 +208,7 @@ void KConditionVariable::SignalImpl(KThread* thread) { // TODO(bunnei): We should call CanAccessAtomic(..) here. can_access = true; if (can_access) [[likely]] { - UpdateLockAtomic(m_system, std::addressof(prev_tag), address, own_tag, + UpdateLockAtomic(m_kernel, std::addressof(prev_tag), address, own_tag, Svc::HandleWaitMask); } } diff --git a/src/core/hle/kernel/k_handle_table.h b/src/core/hle/kernel/k_handle_table.h index d7660630c..4e6dcd66b 100644 --- a/src/core/hle/kernel/k_handle_table.h +++ b/src/core/hle/kernel/k_handle_table.h @@ -30,7 +30,7 @@ public: public: explicit KHandleTable(KernelCore& kernel) : m_kernel(kernel) {} - Result Initialize(s32 size) { + Result Initialize(KProcess* owner, s32 size) { // Check that the table size is valid. R_UNLESS(size <= static_cast<s32>(MaxTableSize), ResultOutOfMemory); @@ -44,6 +44,7 @@ public: m_next_linear_id = MinLinearId; m_count = 0; m_free_head_index = -1; + m_owner = owner; // Free all entries. for (s32 i = 0; i < static_cast<s32>(m_table_size); ++i) { @@ -90,8 +91,8 @@ public: // Handle pseudo-handles. if constexpr (std::derived_from<KProcess, T>) { if (handle == Svc::PseudoHandle::CurrentProcess) { - //! FIXME: this is the wrong process! - auto* const cur_process = m_kernel.ApplicationProcess(); + // TODO: this should be the current process + auto* const cur_process = m_owner; ASSERT(cur_process != nullptr); return cur_process; } @@ -301,6 +302,7 @@ private: private: KernelCore& m_kernel; + KProcess* m_owner{}; std::array<EntryInfo, MaxTableSize> m_entry_infos{}; std::array<KAutoObject*, MaxTableSize> m_objects{}; mutable KSpinLock m_lock; diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 3a2635e1f..d6869c228 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -306,12 +306,16 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge); R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool, params.code_address, params.code_num_pages * PageSize, - m_system_resource, res_limit, this->GetMemory(), 0)); + m_system_resource, res_limit, m_memory, 0)); } ON_RESULT_FAILURE_2 { m_page_table.Finalize(); }; + // Ensure our memory is initialized. + m_memory.SetCurrentPageTable(*this); + m_memory.SetGPUDirtyManagers(m_dirty_memory_managers); + // Ensure we can insert the code region. R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize, KMemoryState::Code), @@ -399,12 +403,16 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge); R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool, params.code_address, code_size, m_system_resource, res_limit, - this->GetMemory(), aslr_space_start)); + m_memory, aslr_space_start)); } ON_RESULT_FAILURE_2 { m_page_table.Finalize(); }; + // Ensure our memory is initialized. + m_memory.SetCurrentPageTable(*this); + m_memory.SetGPUDirtyManagers(m_dirty_memory_managers); + // Ensure we can insert the code region. R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code), ResultInvalidMemoryRegion); @@ -1094,8 +1102,7 @@ void KProcess::UnpinThread(KThread* thread) { Result KProcess::GetThreadList(s32* out_num_threads, KProcessAddress out_thread_ids, s32 max_out_count) { - // TODO: use current memory reference - auto& memory = m_kernel.System().ApplicationMemory(); + auto& memory = this->GetMemory(); // Lock the list. KScopedLightLock lk(m_list_lock); @@ -1128,14 +1135,15 @@ void KProcess::Switch(KProcess* cur_process, KProcess* next_process) {} KProcess::KProcess(KernelCore& kernel) : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel}, m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()}, - m_handle_table{kernel} {} + m_handle_table{kernel}, m_dirty_memory_managers{}, + m_exclusive_monitor{}, m_memory{kernel.System()} {} KProcess::~KProcess() = default; Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, KProcessAddress aslr_space_start, bool is_hbl) { // Create a resource limit for the process. - const auto physical_memory_size = - m_kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application); + const auto pool = static_cast<KMemoryManager::Pool>(metadata.GetPoolPartition()); + const auto physical_memory_size = m_kernel.MemoryManager().GetSize(pool); auto* res_limit = Kernel::CreateResourceLimitForProcess(m_kernel.System(), physical_memory_size); @@ -1146,8 +1154,10 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std: Svc::CreateProcessFlag flag{}; u64 code_address{}; - // We are an application. - flag |= Svc::CreateProcessFlag::IsApplication; + // Determine if we are an application. + if (pool == KMemoryManager::Pool::Application) { + flag |= Svc::CreateProcessFlag::IsApplication; + } // If we are 64-bit, create as such. if (metadata.Is64BitProgram()) { @@ -1196,8 +1206,8 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std: std::memcpy(params.name.data(), name.data(), sizeof(params.name)); // Initialize for application process. - R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit, - KMemoryManager::Pool::Application, aslr_space_start)); + R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit, pool, + aslr_space_start)); // Assign remaining properties. m_is_hbl = is_hbl; @@ -1223,7 +1233,7 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) { ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); #ifdef HAS_NCE - if (Settings::IsNceEnabled()) { + if (this->IsApplication() && Settings::IsNceEnabled()) { auto& buffer = m_kernel.System().DeviceMemory().buffer; const auto& code = code_set.CodeSegment(); const auto& patch = code_set.PatchSegment(); @@ -1235,10 +1245,11 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) { } void KProcess::InitializeInterfaces() { - this->GetMemory().SetCurrentPageTable(*this); + m_exclusive_monitor = + Core::MakeExclusiveMonitor(this->GetMemory(), Core::Hardware::NUM_CPU_CORES); #ifdef HAS_NCE - if (this->Is64Bit() && Settings::IsNceEnabled()) { + if (this->IsApplication() && Settings::IsNceEnabled()) { for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { m_arm_interfaces[i] = std::make_unique<Core::ArmNce>(m_kernel.System(), true, i); } @@ -1248,13 +1259,13 @@ void KProcess::InitializeInterfaces() { for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { m_arm_interfaces[i] = std::make_unique<Core::ArmDynarmic64>( m_kernel.System(), m_kernel.IsMulticore(), this, - static_cast<Core::DynarmicExclusiveMonitor&>(m_kernel.GetExclusiveMonitor()), i); + static_cast<Core::DynarmicExclusiveMonitor&>(*m_exclusive_monitor), i); } } else { for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { m_arm_interfaces[i] = std::make_unique<Core::ArmDynarmic32>( m_kernel.System(), m_kernel.IsMulticore(), this, - static_cast<Core::DynarmicExclusiveMonitor&>(m_kernel.GetExclusiveMonitor()), i); + static_cast<Core::DynarmicExclusiveMonitor&>(*m_exclusive_monitor), i); } } } @@ -1305,9 +1316,10 @@ bool KProcess::RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointT return true; } -Core::Memory::Memory& KProcess::GetMemory() const { - // TODO: per-process memory - return m_kernel.System().ApplicationMemory(); +void KProcess::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { + for (auto& manager : m_dirty_memory_managers) { + manager.Gather(callback); + } } } // namespace Kernel diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index 4b114e39b..b5c6867a1 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -7,6 +7,7 @@ #include "core/arm/arm_interface.h" #include "core/file_sys/program_metadata.h" +#include "core/gpu_dirty_memory_manager.h" #include "core/hle/kernel/code_set.h" #include "core/hle/kernel/k_address_arbiter.h" #include "core/hle/kernel/k_capabilities.h" @@ -17,6 +18,7 @@ #include "core/hle/kernel/k_system_resource.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/k_thread_local_page.h" +#include "core/memory.h" namespace Kernel { @@ -126,6 +128,9 @@ private: #ifdef HAS_NCE std::unordered_map<u64, u64> m_post_handlers{}; #endif + std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> m_dirty_memory_managers; + std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor; + Core::Memory::Memory m_memory; private: Result StartTermination(); @@ -502,7 +507,15 @@ public: void InitializeInterfaces(); - Core::Memory::Memory& GetMemory() const; + Core::Memory::Memory& GetMemory() { + return m_memory; + } + + void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); + + Core::ExclusiveMonitor& GetExclusiveMonitor() const { + return *m_exclusive_monitor; + } public: // Overridden parent functions. @@ -539,7 +552,7 @@ private: Result InitializeHandleTable(s32 size) { // Try to initialize the handle table. - R_TRY(m_handle_table.Initialize(size)); + R_TRY(m_handle_table.Initialize(this, size)); // We succeeded, so note that we did. m_is_handle_table_initialized = true; diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp index e33a88e24..f6ca3dc48 100644 --- a/src/core/hle/kernel/k_server_session.cpp +++ b/src/core/hle/kernel/k_server_session.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/scratch_buffer.h" #include "core/core.h" #include "core/core_timing.h" #include "core/hle/kernel/k_client_port.h" @@ -29,12 +30,138 @@ namespace Kernel { namespace { +constexpr inline size_t PointerTransferBufferAlignment = 0x10; +constexpr inline size_t ReceiveListDataSize = + MessageBuffer::MessageHeader::ReceiveListCountType_CountMax * + MessageBuffer::ReceiveListEntry::GetDataSize() / sizeof(u32); + +using ThreadQueueImplForKServerSessionRequest = KThreadQueue; + +class ReceiveList { +public: + static constexpr int GetEntryCount(const MessageBuffer::MessageHeader& header) { + const auto count = header.GetReceiveListCount(); + switch (count) { + case MessageBuffer::MessageHeader::ReceiveListCountType_None: + return 0; + case MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer: + return 0; + case MessageBuffer::MessageHeader::ReceiveListCountType_ToSingleBuffer: + return 1; + default: + return count - MessageBuffer::MessageHeader::ReceiveListCountType_CountOffset; + } + } + + explicit ReceiveList(const u32* dst_msg, uint64_t dst_address, + KProcessPageTable& dst_page_table, + const MessageBuffer::MessageHeader& dst_header, + const MessageBuffer::SpecialHeader& dst_special_header, size_t msg_size, + size_t out_offset, s32 dst_recv_list_idx, bool is_tls) { + m_recv_list_count = dst_header.GetReceiveListCount(); + m_msg_buffer_end = dst_address + sizeof(u32) * out_offset; + m_msg_buffer_space_end = dst_address + msg_size; + + // NOTE: Nintendo calculates the receive list index here using the special header. + // We pre-calculate it in the caller, and pass it as a parameter. + (void)dst_special_header; + + const u32* recv_list = dst_msg + dst_recv_list_idx; + const auto entry_count = GetEntryCount(dst_header); + + if (is_tls) { + // Messages from TLS to TLS are contained within one page. + std::memcpy(m_data.data(), recv_list, + entry_count * MessageBuffer::ReceiveListEntry::GetDataSize()); + } else { + // If any buffer is not from TLS, perform a normal read instead. + uint64_t cur_addr = dst_address + dst_recv_list_idx * sizeof(u32); + dst_page_table.GetMemory().ReadBlock( + cur_addr, m_data.data(), + entry_count * MessageBuffer::ReceiveListEntry::GetDataSize()); + } + } + + bool IsIndex() const { + return m_recv_list_count > + static_cast<s32>(MessageBuffer::MessageHeader::ReceiveListCountType_CountOffset); + } + + bool IsToMessageBuffer() const { + return m_recv_list_count == + MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer; + } + + void GetBuffer(uint64_t& out, size_t size, int& key) const { + switch (m_recv_list_count) { + case MessageBuffer::MessageHeader::ReceiveListCountType_None: { + out = 0; + break; + } + case MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer: { + const uint64_t buf = + Common::AlignUp(m_msg_buffer_end + key, PointerTransferBufferAlignment); + + if ((buf < buf + size) && (buf + size <= m_msg_buffer_space_end)) { + out = buf; + key = static_cast<int>(buf + size - m_msg_buffer_end); + } else { + out = 0; + } + break; + } + case MessageBuffer::MessageHeader::ReceiveListCountType_ToSingleBuffer: { + const MessageBuffer::ReceiveListEntry entry(m_data[0], m_data[1]); + const uint64_t buf = + Common::AlignUp(entry.GetAddress() + key, PointerTransferBufferAlignment); + + const uint64_t entry_addr = entry.GetAddress(); + const size_t entry_size = entry.GetSize(); + + if ((buf < buf + size) && (entry_addr < entry_addr + entry_size) && + (buf + size <= entry_addr + entry_size)) { + out = buf; + key = static_cast<int>(buf + size - entry_addr); + } else { + out = 0; + } + break; + } + default: { + if (key < m_recv_list_count - + static_cast<s32>( + MessageBuffer::MessageHeader::ReceiveListCountType_CountOffset)) { + const MessageBuffer::ReceiveListEntry entry(m_data[2 * key + 0], + m_data[2 * key + 1]); + + const uintptr_t entry_addr = entry.GetAddress(); + const size_t entry_size = entry.GetSize(); + + if ((entry_addr < entry_addr + entry_size) && (entry_size >= size)) { + out = entry_addr; + } + } else { + out = 0; + } + break; + } + } + } + +private: + std::array<u32, ReceiveListDataSize> m_data; + s32 m_recv_list_count; + uint64_t m_msg_buffer_end; + uint64_t m_msg_buffer_space_end; +}; + template <bool MoveHandleAllowed> -Result ProcessMessageSpecialData(KProcess& dst_process, KProcess& src_process, KThread& src_thread, - MessageBuffer& dst_msg, const MessageBuffer& src_msg, - MessageBuffer::SpecialHeader& src_special_header) { +Result ProcessMessageSpecialData(s32& offset, KProcess& dst_process, KProcess& src_process, + KThread& src_thread, const MessageBuffer& dst_msg, + const MessageBuffer& src_msg, + const MessageBuffer::SpecialHeader& src_special_header) { // Copy the special header to the destination. - s32 offset = dst_msg.Set(src_special_header); + offset = dst_msg.Set(src_special_header); // Copy the process ID. if (src_special_header.GetHasProcessId()) { @@ -110,6 +237,102 @@ Result ProcessMessageSpecialData(KProcess& dst_process, KProcess& src_process, K R_RETURN(result); } +Result ProcessReceiveMessagePointerDescriptors(int& offset, int& pointer_key, + KProcessPageTable& dst_page_table, + KProcessPageTable& src_page_table, + const MessageBuffer& dst_msg, + const MessageBuffer& src_msg, + const ReceiveList& dst_recv_list, bool dst_user) { + // Get the offset at the start of processing. + const int cur_offset = offset; + + // Get the pointer desc. + MessageBuffer::PointerDescriptor src_desc(src_msg, cur_offset); + offset += static_cast<int>(MessageBuffer::PointerDescriptor::GetDataSize() / sizeof(u32)); + + // Extract address/size. + const uint64_t src_pointer = src_desc.GetAddress(); + const size_t recv_size = src_desc.GetSize(); + uint64_t recv_pointer = 0; + + // Process the buffer, if it has a size. + if (recv_size > 0) { + // If using indexing, set index. + if (dst_recv_list.IsIndex()) { + pointer_key = src_desc.GetIndex(); + } + + // Get the buffer. + dst_recv_list.GetBuffer(recv_pointer, recv_size, pointer_key); + R_UNLESS(recv_pointer != 0, ResultOutOfResource); + + // Perform the pointer data copy. + if (dst_user) { + R_TRY(src_page_table.CopyMemoryFromHeapToHeapWithoutCheckDestination( + dst_page_table, recv_pointer, recv_size, KMemoryState::FlagReferenceCounted, + KMemoryState::FlagReferenceCounted, + KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite, + KMemoryAttribute::Uncached | KMemoryAttribute::Locked, KMemoryAttribute::Locked, + src_pointer, KMemoryState::FlagLinearMapped, KMemoryState::FlagLinearMapped, + KMemoryPermission::UserRead, KMemoryAttribute::Uncached, KMemoryAttribute::None)); + } else { + R_TRY(src_page_table.CopyMemoryFromLinearToUser( + recv_pointer, recv_size, src_pointer, KMemoryState::FlagLinearMapped, + KMemoryState::FlagLinearMapped, KMemoryPermission::UserRead, + KMemoryAttribute::Uncached, KMemoryAttribute::None)); + } + } + + // Set the output descriptor. + dst_msg.Set(cur_offset, MessageBuffer::PointerDescriptor(reinterpret_cast<void*>(recv_pointer), + recv_size, src_desc.GetIndex())); + + R_SUCCEED(); +} + +constexpr Result GetMapAliasMemoryState(KMemoryState& out, + MessageBuffer::MapAliasDescriptor::Attribute attr) { + switch (attr) { + case MessageBuffer::MapAliasDescriptor::Attribute::Ipc: + out = KMemoryState::Ipc; + break; + case MessageBuffer::MapAliasDescriptor::Attribute::NonSecureIpc: + out = KMemoryState::NonSecureIpc; + break; + case MessageBuffer::MapAliasDescriptor::Attribute::NonDeviceIpc: + out = KMemoryState::NonDeviceIpc; + break; + default: + R_THROW(ResultInvalidCombination); + } + + R_SUCCEED(); +} + +constexpr Result GetMapAliasTestStateAndAttributeMask(KMemoryState& out_state, + KMemoryAttribute& out_attr_mask, + KMemoryState state) { + switch (state) { + case KMemoryState::Ipc: + out_state = KMemoryState::FlagCanUseIpc; + out_attr_mask = + KMemoryAttribute::Uncached | KMemoryAttribute::DeviceShared | KMemoryAttribute::Locked; + break; + case KMemoryState::NonSecureIpc: + out_state = KMemoryState::FlagCanUseNonSecureIpc; + out_attr_mask = KMemoryAttribute::Uncached | KMemoryAttribute::Locked; + break; + case KMemoryState::NonDeviceIpc: + out_state = KMemoryState::FlagCanUseNonDeviceIpc; + out_attr_mask = KMemoryAttribute::Uncached | KMemoryAttribute::Locked; + break; + default: + R_THROW(ResultInvalidCombination); + } + + R_SUCCEED(); +} + void CleanupSpecialData(KProcess& dst_process, u32* dst_msg_ptr, size_t dst_buffer_size) { // Parse the message. const MessageBuffer dst_msg(dst_msg_ptr, dst_buffer_size); @@ -144,166 +367,856 @@ void CleanupSpecialData(KProcess& dst_process, u32* dst_msg_ptr, size_t dst_buff } } -} // namespace +Result CleanupServerHandles(KernelCore& kernel, uint64_t message, size_t buffer_size, + KPhysicalAddress message_paddr) { + // Server is assumed to be current thread. + KThread& thread = GetCurrentThread(kernel); -using ThreadQueueImplForKServerSessionRequest = KThreadQueue; + // Get the linear message pointer. + u32* msg_ptr; + if (message) { + msg_ptr = kernel.System().DeviceMemory().GetPointer<u32>(message_paddr); + } else { + msg_ptr = GetCurrentMemory(kernel).GetPointer<u32>(thread.GetTlsAddress()); + buffer_size = MessageBufferSize; + message = GetInteger(thread.GetTlsAddress()); + } -KServerSession::KServerSession(KernelCore& kernel) - : KSynchronizationObject{kernel}, m_lock{m_kernel} {} + // Parse the message. + const MessageBuffer msg(msg_ptr, buffer_size); + const MessageBuffer::MessageHeader header(msg); + const MessageBuffer::SpecialHeader special_header(msg, header); -KServerSession::~KServerSession() = default; + // Check that the size is big enough. + R_UNLESS(MessageBuffer::GetMessageBufferSize(header, special_header) <= buffer_size, + ResultInvalidCombination); + + // If there's a special header, there may be move handles we need to close. + if (header.GetHasSpecialHeader()) { + // Determine the offset to the start of handles. + auto offset = msg.GetSpecialDataIndex(header, special_header); + if (special_header.GetHasProcessId()) { + offset += static_cast<int>(sizeof(u64) / sizeof(u32)); + } + if (auto copy_count = special_header.GetCopyHandleCount(); copy_count > 0) { + offset += static_cast<int>((sizeof(Svc::Handle) * copy_count) / sizeof(u32)); + } -void KServerSession::Destroy() { - m_parent->OnServerClosed(); + // Get the handle table. + auto& handle_table = thread.GetOwnerProcess()->GetHandleTable(); - this->CleanupRequests(); + // Close the handles. + for (auto i = 0; i < special_header.GetMoveHandleCount(); ++i) { + handle_table.Remove(msg.GetHandle(offset)); + offset += static_cast<int>(sizeof(Svc::Handle) / sizeof(u32)); + } + } - m_parent->Close(); + R_SUCCEED(); } -void KServerSession::OnClientClosed() { - KScopedLightLock lk{m_lock}; +Result CleanupServerMap(KSessionRequest* request, KProcess* server_process) { + // If there's no server process, there's nothing to clean up. + R_SUCCEED_IF(server_process == nullptr); - // Handle any pending requests. - KSessionRequest* prev_request = nullptr; - while (true) { - // Declare variables for processing the request. - KSessionRequest* request = nullptr; - KEvent* event = nullptr; - KThread* thread = nullptr; - bool cur_request = false; - bool terminate = false; + // Get the page table. + auto& server_page_table = server_process->GetPageTable(); - // Get the next request. - { - KScopedSchedulerLock sl{m_kernel}; + // Cleanup Send mappings. + for (size_t i = 0; i < request->GetSendCount(); ++i) { + R_TRY(server_page_table.CleanupForIpcServer(request->GetSendServerAddress(i), + request->GetSendSize(i), + request->GetSendMemoryState(i))); + } - if (m_current_request != nullptr && m_current_request != prev_request) { - // Set the request, open a reference as we process it. - request = m_current_request; - request->Open(); - cur_request = true; + // Cleanup Receive mappings. + for (size_t i = 0; i < request->GetReceiveCount(); ++i) { + R_TRY(server_page_table.CleanupForIpcServer(request->GetReceiveServerAddress(i), + request->GetReceiveSize(i), + request->GetReceiveMemoryState(i))); + } - // Get thread and event for the request. - thread = request->GetThread(); - event = request->GetEvent(); + // Cleanup Exchange mappings. + for (size_t i = 0; i < request->GetExchangeCount(); ++i) { + R_TRY(server_page_table.CleanupForIpcServer(request->GetExchangeServerAddress(i), + request->GetExchangeSize(i), + request->GetExchangeMemoryState(i))); + } - // If the thread is terminating, handle that. - if (thread->IsTerminationRequested()) { - request->ClearThread(); - request->ClearEvent(); - terminate = true; - } + R_SUCCEED(); +} - prev_request = request; - } else if (!m_request_list.empty()) { - // Pop the request from the front of the list. - request = std::addressof(m_request_list.front()); - m_request_list.pop_front(); +Result CleanupClientMap(KSessionRequest* request, KProcessPageTable* client_page_table) { + // If there's no client page table, there's nothing to clean up. + R_SUCCEED_IF(client_page_table == nullptr); - // Get thread and event for the request. - thread = request->GetThread(); - event = request->GetEvent(); - } + // Cleanup Send mappings. + for (size_t i = 0; i < request->GetSendCount(); ++i) { + R_TRY(client_page_table->CleanupForIpcClient(request->GetSendClientAddress(i), + request->GetSendSize(i), + request->GetSendMemoryState(i))); + } + + // Cleanup Receive mappings. + for (size_t i = 0; i < request->GetReceiveCount(); ++i) { + R_TRY(client_page_table->CleanupForIpcClient(request->GetReceiveClientAddress(i), + request->GetReceiveSize(i), + request->GetReceiveMemoryState(i))); + } + + // Cleanup Exchange mappings. + for (size_t i = 0; i < request->GetExchangeCount(); ++i) { + R_TRY(client_page_table->CleanupForIpcClient(request->GetExchangeClientAddress(i), + request->GetExchangeSize(i), + request->GetExchangeMemoryState(i))); + } + + R_SUCCEED(); +} + +Result CleanupMap(KSessionRequest* request, KProcess* server_process, + KProcessPageTable* client_page_table) { + // Cleanup the server map. + R_TRY(CleanupServerMap(request, server_process)); + + // Cleanup the client map. + R_TRY(CleanupClientMap(request, client_page_table)); + + R_SUCCEED(); +} + +Result ProcessReceiveMessageMapAliasDescriptors(int& offset, KProcessPageTable& dst_page_table, + KProcessPageTable& src_page_table, + const MessageBuffer& dst_msg, + const MessageBuffer& src_msg, + KSessionRequest* request, KMemoryPermission perm, + bool send) { + // Get the offset at the start of processing. + const int cur_offset = offset; + + // Get the map alias descriptor. + MessageBuffer::MapAliasDescriptor src_desc(src_msg, cur_offset); + offset += static_cast<int>(MessageBuffer::MapAliasDescriptor::GetDataSize() / sizeof(u32)); + + // Extract address/size. + const KProcessAddress src_address = src_desc.GetAddress(); + const size_t size = src_desc.GetSize(); + KProcessAddress dst_address = 0; + + // Determine the result memory state. + KMemoryState dst_state; + R_TRY(GetMapAliasMemoryState(dst_state, src_desc.GetAttribute())); + + // Process the buffer, if it has a size. + if (size > 0) { + // Set up the source pages for ipc. + R_TRY(dst_page_table.SetupForIpc(std::addressof(dst_address), size, src_address, + src_page_table, perm, dst_state, send)); + + // Ensure that we clean up on failure. + ON_RESULT_FAILURE { + dst_page_table.CleanupForIpcServer(dst_address, size, dst_state); + src_page_table.CleanupForIpcClient(src_address, size, dst_state); + }; + + // Push the appropriate mapping. + if (perm == KMemoryPermission::UserRead) { + R_TRY(request->PushSend(src_address, dst_address, size, dst_state)); + } else if (send) { + R_TRY(request->PushExchange(src_address, dst_address, size, dst_state)); + } else { + R_TRY(request->PushReceive(src_address, dst_address, size, dst_state)); } + } - // If there are no requests, we're done. - if (request == nullptr) { - break; + // Set the output descriptor. + dst_msg.Set(cur_offset, + MessageBuffer::MapAliasDescriptor(reinterpret_cast<void*>(GetInteger(dst_address)), + size, src_desc.GetAttribute())); + + R_SUCCEED(); +} + +Result ReceiveMessage(KernelCore& kernel, bool& recv_list_broken, uint64_t dst_message_buffer, + size_t dst_buffer_size, KPhysicalAddress dst_message_paddr, + KThread& src_thread, uint64_t src_message_buffer, size_t src_buffer_size, + KServerSession* session, KSessionRequest* request) { + // Prepare variables for receive. + KThread& dst_thread = GetCurrentThread(kernel); + KProcess& dst_process = *(dst_thread.GetOwnerProcess()); + KProcess& src_process = *(src_thread.GetOwnerProcess()); + auto& dst_page_table = dst_process.GetPageTable(); + auto& src_page_table = src_process.GetPageTable(); + + // NOTE: Session is used only for debugging, and so may go unused. + (void)session; + + // The receive list is initially not broken. + recv_list_broken = false; + + // Set the server process for the request. + request->SetServerProcess(std::addressof(dst_process)); + + // Determine the message buffers. + u32 *dst_msg_ptr, *src_msg_ptr; + bool dst_user, src_user; + + if (dst_message_buffer) { + dst_msg_ptr = kernel.System().DeviceMemory().GetPointer<u32>(dst_message_paddr); + dst_user = true; + } else { + dst_msg_ptr = dst_page_table.GetMemory().GetPointer<u32>(dst_thread.GetTlsAddress()); + dst_buffer_size = MessageBufferSize; + dst_message_buffer = GetInteger(dst_thread.GetTlsAddress()); + dst_user = false; + } + + if (src_message_buffer) { + // NOTE: Nintendo does not check the result of this GetPhysicalAddress call. + src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_message_buffer); + src_user = true; + } else { + src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_thread.GetTlsAddress()); + src_buffer_size = MessageBufferSize; + src_message_buffer = GetInteger(src_thread.GetTlsAddress()); + src_user = false; + } + + // Parse the headers. + const MessageBuffer dst_msg(dst_msg_ptr, dst_buffer_size); + const MessageBuffer src_msg(src_msg_ptr, src_buffer_size); + const MessageBuffer::MessageHeader dst_header(dst_msg); + const MessageBuffer::MessageHeader src_header(src_msg); + const MessageBuffer::SpecialHeader dst_special_header(dst_msg, dst_header); + const MessageBuffer::SpecialHeader src_special_header(src_msg, src_header); + + // Get the end of the source message. + const size_t src_end_offset = + MessageBuffer::GetRawDataIndex(src_header, src_special_header) + src_header.GetRawCount(); + + // Ensure that the headers fit. + R_UNLESS(MessageBuffer::GetMessageBufferSize(dst_header, dst_special_header) <= dst_buffer_size, + ResultInvalidCombination); + R_UNLESS(MessageBuffer::GetMessageBufferSize(src_header, src_special_header) <= src_buffer_size, + ResultInvalidCombination); + + // Ensure the receive list offset is after the end of raw data. + if (dst_header.GetReceiveListOffset()) { + R_UNLESS(dst_header.GetReceiveListOffset() >= + MessageBuffer::GetRawDataIndex(dst_header, dst_special_header) + + dst_header.GetRawCount(), + ResultInvalidCombination); + } + + // Ensure that the destination buffer is big enough to receive the source. + R_UNLESS(dst_buffer_size >= src_end_offset * sizeof(u32), ResultMessageTooLarge); + + // Get the receive list. + const s32 dst_recv_list_idx = + MessageBuffer::GetReceiveListIndex(dst_header, dst_special_header); + ReceiveList dst_recv_list(dst_msg_ptr, dst_message_buffer, dst_page_table, dst_header, + dst_special_header, dst_buffer_size, src_end_offset, + dst_recv_list_idx, !dst_user); + + // Ensure that the source special header isn't invalid. + const bool src_has_special_header = src_header.GetHasSpecialHeader(); + if (src_has_special_header) { + // Sending move handles from client -> server is not allowed. + R_UNLESS(src_special_header.GetMoveHandleCount() == 0, ResultInvalidCombination); + } + + // Prepare for further processing. + int pointer_key = 0; + int offset = dst_msg.Set(src_header); + + // Set up a guard to make sure that we end up in a clean state on error. + ON_RESULT_FAILURE { + // Cleanup mappings. + CleanupMap(request, std::addressof(dst_process), std::addressof(src_page_table)); + + // Cleanup special data. + if (src_header.GetHasSpecialHeader()) { + CleanupSpecialData(dst_process, dst_msg_ptr, dst_buffer_size); } - // All requests must have threads. - ASSERT(thread != nullptr); + // Cleanup the header if the receive list isn't broken. + if (!recv_list_broken) { + dst_msg.Set(dst_header); + if (dst_header.GetHasSpecialHeader()) { + dst_msg.Set(dst_special_header); + } + } + }; + + // Process any special data. + if (src_header.GetHasSpecialHeader()) { + // After we process, make sure we track whether the receive list is broken. + SCOPE_EXIT({ + if (offset > dst_recv_list_idx) { + recv_list_broken = true; + } + }); - // Ensure that we close the request when done. - SCOPE_EXIT({ request->Close(); }); + // Process special data. + R_TRY(ProcessMessageSpecialData<false>(offset, dst_process, src_process, src_thread, + dst_msg, src_msg, src_special_header)); + } - // If we're terminating, close a reference to the thread and event. - if (terminate) { - thread->Close(); - if (event != nullptr) { - event->Close(); + // Process any pointer buffers. + for (auto i = 0; i < src_header.GetPointerCount(); ++i) { + // After we process, make sure we track whether the receive list is broken. + SCOPE_EXIT({ + if (offset > dst_recv_list_idx) { + recv_list_broken = true; + } + }); + + R_TRY(ProcessReceiveMessagePointerDescriptors( + offset, pointer_key, dst_page_table, src_page_table, dst_msg, src_msg, dst_recv_list, + dst_user && dst_header.GetReceiveListCount() == + MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer)); + } + + // Process any map alias buffers. + for (auto i = 0; i < src_header.GetMapAliasCount(); ++i) { + // After we process, make sure we track whether the receive list is broken. + SCOPE_EXIT({ + if (offset > dst_recv_list_idx) { + recv_list_broken = true; + } + }); + + // We process in order send, recv, exch. Buffers after send (recv/exch) are ReadWrite. + const KMemoryPermission perm = (i >= src_header.GetSendCount()) + ? KMemoryPermission::UserReadWrite + : KMemoryPermission::UserRead; + + // Buffer is send if it is send or exch. + const bool send = (i < src_header.GetSendCount()) || + (i >= src_header.GetSendCount() + src_header.GetReceiveCount()); + + R_TRY(ProcessReceiveMessageMapAliasDescriptors(offset, dst_page_table, src_page_table, + dst_msg, src_msg, request, perm, send)); + } + + // Process any raw data. + if (const auto raw_count = src_header.GetRawCount(); raw_count != 0) { + // After we process, make sure we track whether the receive list is broken. + SCOPE_EXIT({ + if (offset + raw_count > dst_recv_list_idx) { + recv_list_broken = true; } + }); + + // Get the offset and size. + const size_t offset_words = offset * sizeof(u32); + const size_t raw_size = raw_count * sizeof(u32); + + if (!dst_user && !src_user) { + // Fast case is TLS -> TLS, do raw memcpy if we can. + std::memcpy(dst_msg_ptr + offset, src_msg_ptr + offset, raw_size); + } else if (dst_user) { + // Determine how much fast size we can copy. + const size_t max_fast_size = std::min<size_t>(offset_words + raw_size, PageSize); + const size_t fast_size = max_fast_size - offset_words; + + // Determine source state; if user buffer, we require heap, and otherwise only linear + // mapped (to enable tls use). + const auto src_state = + src_user ? KMemoryState::FlagReferenceCounted : KMemoryState::FlagLinearMapped; + + // Determine the source permission. User buffer should be unmapped + read, TLS should be + // user readable. + const KMemoryPermission src_perm = static_cast<KMemoryPermission>( + src_user ? KMemoryPermission::NotMapped | KMemoryPermission::KernelRead + : KMemoryPermission::UserRead); + + // Perform the fast part of the copy. + R_TRY(src_page_table.CopyMemoryFromLinearToKernel( + dst_msg_ptr + offset, fast_size, src_message_buffer + offset_words, src_state, + src_state, src_perm, KMemoryAttribute::Uncached, KMemoryAttribute::None)); + + // If the fast part of the copy didn't get everything, perform the slow part of the + // copy. + if (fast_size < raw_size) { + R_TRY(src_page_table.CopyMemoryFromHeapToHeap( + dst_page_table, dst_message_buffer + max_fast_size, raw_size - fast_size, + KMemoryState::FlagReferenceCounted, KMemoryState::FlagReferenceCounted, + KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite, + KMemoryAttribute::Uncached | KMemoryAttribute::Locked, KMemoryAttribute::Locked, + src_message_buffer + max_fast_size, src_state, src_state, src_perm, + KMemoryAttribute::Uncached, KMemoryAttribute::None)); + } + } else /* if (src_user) */ { + // The source is a user buffer, so it should be unmapped + readable. + constexpr KMemoryPermission SourcePermission = static_cast<KMemoryPermission>( + KMemoryPermission::NotMapped | KMemoryPermission::KernelRead); + + // Copy the memory. + R_TRY(src_page_table.CopyMemoryFromLinearToUser( + dst_message_buffer + offset_words, raw_size, src_message_buffer + offset_words, + KMemoryState::FlagReferenceCounted, KMemoryState::FlagReferenceCounted, + SourcePermission, KMemoryAttribute::Uncached, KMemoryAttribute::None)); } + } - // If we need to, reply. - if (event != nullptr && !cur_request) { - // There must be no mappings. - ASSERT(request->GetSendCount() == 0); - ASSERT(request->GetReceiveCount() == 0); - ASSERT(request->GetExchangeCount() == 0); + // We succeeded! + R_SUCCEED(); +} - // // Get the process and page table. - // KProcess *client_process = thread->GetOwnerProcess(); - // auto& client_pt = client_process->GetPageTable(); +Result ProcessSendMessageReceiveMapping(KProcessPageTable& src_page_table, + KProcessPageTable& dst_page_table, + KProcessAddress client_address, + KProcessAddress server_address, size_t size, + KMemoryState src_state) { + // If the size is zero, there's nothing to process. + R_SUCCEED_IF(size == 0); + + // Get the memory state and attribute mask to test. + KMemoryState test_state; + KMemoryAttribute test_attr_mask; + R_TRY(GetMapAliasTestStateAndAttributeMask(test_state, test_attr_mask, src_state)); + + // Determine buffer extents. + KProcessAddress aligned_dst_start = Common::AlignDown(GetInteger(client_address), PageSize); + KProcessAddress aligned_dst_end = Common::AlignUp(GetInteger(client_address) + size, PageSize); + KProcessAddress mapping_dst_start = Common::AlignUp(GetInteger(client_address), PageSize); + KProcessAddress mapping_dst_end = + Common::AlignDown(GetInteger(client_address) + size, PageSize); + + KProcessAddress mapping_src_end = + Common::AlignDown(GetInteger(server_address) + size, PageSize); + + // If the start of the buffer is unaligned, handle that. + if (aligned_dst_start != mapping_dst_start) { + ASSERT(client_address < mapping_dst_start); + const size_t copy_size = std::min<size_t>(size, mapping_dst_start - client_address); + R_TRY(dst_page_table.CopyMemoryFromUserToLinear( + client_address, copy_size, test_state, test_state, KMemoryPermission::UserReadWrite, + test_attr_mask, KMemoryAttribute::None, server_address)); + } - // // Reply to the request. - // ReplyAsyncError(client_process, request->GetAddress(), request->GetSize(), - // ResultSessionClosed); + // If the end of the buffer is unaligned, handle that. + if (mapping_dst_end < aligned_dst_end && + (aligned_dst_start == mapping_dst_start || aligned_dst_start < mapping_dst_end)) { + const size_t copy_size = client_address + size - mapping_dst_end; + R_TRY(dst_page_table.CopyMemoryFromUserToLinear( + mapping_dst_end, copy_size, test_state, test_state, KMemoryPermission::UserReadWrite, + test_attr_mask, KMemoryAttribute::None, mapping_src_end)); + } - // // Unlock the buffer. - // // NOTE: Nintendo does not check the result of this. - // client_pt.UnlockForIpcUserBuffer(request->GetAddress(), request->GetSize()); + R_SUCCEED(); +} - // Signal the event. - event->Signal(); +Result ProcessSendMessagePointerDescriptors(int& offset, int& pointer_key, + KProcessPageTable& src_page_table, + KProcessPageTable& dst_page_table, + const MessageBuffer& dst_msg, + const MessageBuffer& src_msg, + const ReceiveList& dst_recv_list, bool dst_user) { + // Get the offset at the start of processing. + const int cur_offset = offset; + + // Get the pointer desc. + MessageBuffer::PointerDescriptor src_desc(src_msg, cur_offset); + offset += static_cast<int>(MessageBuffer::PointerDescriptor::GetDataSize() / sizeof(u32)); + + // Extract address/size. + const uint64_t src_pointer = src_desc.GetAddress(); + const size_t recv_size = src_desc.GetSize(); + uint64_t recv_pointer = 0; + + // Process the buffer, if it has a size. + if (recv_size > 0) { + // If using indexing, set index. + if (dst_recv_list.IsIndex()) { + pointer_key = src_desc.GetIndex(); } + + // Get the buffer. + dst_recv_list.GetBuffer(recv_pointer, recv_size, pointer_key); + R_UNLESS(recv_pointer != 0, ResultOutOfResource); + + // Perform the pointer data copy. + const bool dst_heap = dst_user && dst_recv_list.IsToMessageBuffer(); + const auto dst_state = + dst_heap ? KMemoryState::FlagReferenceCounted : KMemoryState::FlagLinearMapped; + const KMemoryPermission dst_perm = + dst_heap ? KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite + : KMemoryPermission::UserReadWrite; + R_TRY(dst_page_table.CopyMemoryFromUserToLinear( + recv_pointer, recv_size, dst_state, dst_state, dst_perm, KMemoryAttribute::Uncached, + KMemoryAttribute::None, src_pointer)); } - // Notify. - this->NotifyAvailable(ResultSessionClosed); + // Set the output descriptor. + dst_msg.Set(cur_offset, MessageBuffer::PointerDescriptor(reinterpret_cast<void*>(recv_pointer), + recv_size, src_desc.GetIndex())); + + R_SUCCEED(); } -bool KServerSession::IsSignaled() const { - ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel)); +Result SendMessage(KernelCore& kernel, uint64_t src_message_buffer, size_t src_buffer_size, + KPhysicalAddress src_message_paddr, KThread& dst_thread, + uint64_t dst_message_buffer, size_t dst_buffer_size, KServerSession* session, + KSessionRequest* request) { + // Prepare variables for send. + KThread& src_thread = GetCurrentThread(kernel); + KProcess& dst_process = *(dst_thread.GetOwnerProcess()); + KProcess& src_process = *(src_thread.GetOwnerProcess()); + auto& dst_page_table = dst_process.GetPageTable(); + auto& src_page_table = src_process.GetPageTable(); + + // NOTE: Session is used only for debugging, and so may go unused. + (void)session; + + // Determine the message buffers. + u32 *dst_msg_ptr, *src_msg_ptr; + bool dst_user, src_user; + + if (dst_message_buffer) { + // NOTE: Nintendo does not check the result of this GetPhysicalAddress call. + dst_msg_ptr = dst_page_table.GetMemory().GetPointer<u32>(dst_message_buffer); + dst_user = true; + } else { + dst_msg_ptr = dst_page_table.GetMemory().GetPointer<u32>(dst_thread.GetTlsAddress()); + dst_buffer_size = MessageBufferSize; + dst_message_buffer = GetInteger(dst_thread.GetTlsAddress()); + dst_user = false; + } - // If the client is closed, we're always signaled. - if (m_parent->IsClientClosed()) { - return true; + if (src_message_buffer) { + src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_message_buffer); + src_user = true; + } else { + src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_thread.GetTlsAddress()); + src_buffer_size = MessageBufferSize; + src_message_buffer = GetInteger(src_thread.GetTlsAddress()); + src_user = false; } - // Otherwise, we're signaled if we have a request and aren't handling one. - return !m_request_list.empty() && m_current_request == nullptr; + // Parse the headers. + const MessageBuffer dst_msg(dst_msg_ptr, dst_buffer_size); + const MessageBuffer src_msg(src_msg_ptr, src_buffer_size); + const MessageBuffer::MessageHeader dst_header(dst_msg); + const MessageBuffer::MessageHeader src_header(src_msg); + const MessageBuffer::SpecialHeader dst_special_header(dst_msg, dst_header); + const MessageBuffer::SpecialHeader src_special_header(src_msg, src_header); + + // Get the end of the source message. + const size_t src_end_offset = + MessageBuffer::GetRawDataIndex(src_header, src_special_header) + src_header.GetRawCount(); + + // Declare variables for processing. + int offset = 0; + int pointer_key = 0; + bool processed_special_data = false; + + // Send the message. + { + // Make sure that we end up in a clean state on error. + ON_RESULT_FAILURE { + // Cleanup special data. + if (processed_special_data) { + if (src_header.GetHasSpecialHeader()) { + CleanupSpecialData(dst_process, dst_msg_ptr, dst_buffer_size); + } + } else { + CleanupServerHandles(kernel, src_user ? src_message_buffer : 0, src_buffer_size, + src_message_paddr); + } + + // Cleanup mappings. + CleanupMap(request, std::addressof(src_process), std::addressof(dst_page_table)); + }; + + // Ensure that the headers fit. + R_UNLESS(MessageBuffer::GetMessageBufferSize(src_header, src_special_header) <= + src_buffer_size, + ResultInvalidCombination); + R_UNLESS(MessageBuffer::GetMessageBufferSize(dst_header, dst_special_header) <= + dst_buffer_size, + ResultInvalidCombination); + + // Ensure the receive list offset is after the end of raw data. + if (dst_header.GetReceiveListOffset()) { + R_UNLESS(dst_header.GetReceiveListOffset() >= + MessageBuffer::GetRawDataIndex(dst_header, dst_special_header) + + dst_header.GetRawCount(), + ResultInvalidCombination); + } + + // Ensure that the destination buffer is big enough to receive the source. + R_UNLESS(dst_buffer_size >= src_end_offset * sizeof(u32), ResultMessageTooLarge); + + // Replies must have no buffers. + R_UNLESS(src_header.GetSendCount() == 0, ResultInvalidCombination); + R_UNLESS(src_header.GetReceiveCount() == 0, ResultInvalidCombination); + R_UNLESS(src_header.GetExchangeCount() == 0, ResultInvalidCombination); + + // Get the receive list. + const s32 dst_recv_list_idx = + MessageBuffer::GetReceiveListIndex(dst_header, dst_special_header); + ReceiveList dst_recv_list(dst_msg_ptr, dst_message_buffer, dst_page_table, dst_header, + dst_special_header, dst_buffer_size, src_end_offset, + dst_recv_list_idx, !dst_user); + + // Handle any receive buffers. + for (size_t i = 0; i < request->GetReceiveCount(); ++i) { + R_TRY(ProcessSendMessageReceiveMapping( + src_page_table, dst_page_table, request->GetReceiveClientAddress(i), + request->GetReceiveServerAddress(i), request->GetReceiveSize(i), + request->GetReceiveMemoryState(i))); + } + + // Handle any exchange buffers. + for (size_t i = 0; i < request->GetExchangeCount(); ++i) { + R_TRY(ProcessSendMessageReceiveMapping( + src_page_table, dst_page_table, request->GetExchangeClientAddress(i), + request->GetExchangeServerAddress(i), request->GetExchangeSize(i), + request->GetExchangeMemoryState(i))); + } + + // Set the header. + offset = dst_msg.Set(src_header); + + // Process any special data. + ASSERT(GetCurrentThreadPointer(kernel) == std::addressof(src_thread)); + processed_special_data = true; + if (src_header.GetHasSpecialHeader()) { + R_TRY(ProcessMessageSpecialData<true>(offset, dst_process, src_process, src_thread, + dst_msg, src_msg, src_special_header)); + } + + // Process any pointer buffers. + for (auto i = 0; i < src_header.GetPointerCount(); ++i) { + R_TRY(ProcessSendMessagePointerDescriptors( + offset, pointer_key, src_page_table, dst_page_table, dst_msg, src_msg, + dst_recv_list, + dst_user && + dst_header.GetReceiveListCount() == + MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer)); + } + + // Clear any map alias buffers. + for (auto i = 0; i < src_header.GetMapAliasCount(); ++i) { + offset = dst_msg.Set(offset, MessageBuffer::MapAliasDescriptor()); + } + + // Process any raw data. + if (const auto raw_count = src_header.GetRawCount(); raw_count != 0) { + // Get the offset and size. + const size_t offset_words = offset * sizeof(u32); + const size_t raw_size = raw_count * sizeof(u32); + + if (!dst_user && !src_user) { + // Fast case is TLS -> TLS, do raw memcpy if we can. + std::memcpy(dst_msg_ptr + offset, src_msg_ptr + offset, raw_size); + } else if (src_user) { + // Determine how much fast size we can copy. + const size_t max_fast_size = std::min<size_t>(offset_words + raw_size, PageSize); + const size_t fast_size = max_fast_size - offset_words; + + // Determine dst state; if user buffer, we require heap, and otherwise only linear + // mapped (to enable tls use). + const auto dst_state = + dst_user ? KMemoryState::FlagReferenceCounted : KMemoryState::FlagLinearMapped; + + // Determine the dst permission. User buffer should be unmapped + read, TLS should + // be user readable. + const KMemoryPermission dst_perm = + dst_user ? KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite + : KMemoryPermission::UserReadWrite; + + // Perform the fast part of the copy. + R_TRY(dst_page_table.CopyMemoryFromKernelToLinear( + dst_message_buffer + offset_words, fast_size, dst_state, dst_state, dst_perm, + KMemoryAttribute::Uncached, KMemoryAttribute::None, src_msg_ptr + offset)); + + // If the fast part of the copy didn't get everything, perform the slow part of the + // copy. + if (fast_size < raw_size) { + R_TRY(dst_page_table.CopyMemoryFromHeapToHeap( + dst_page_table, dst_message_buffer + max_fast_size, raw_size - fast_size, + dst_state, dst_state, dst_perm, KMemoryAttribute::Uncached, + KMemoryAttribute::None, src_message_buffer + max_fast_size, + KMemoryState::FlagReferenceCounted, KMemoryState::FlagReferenceCounted, + KMemoryPermission::NotMapped | KMemoryPermission::KernelRead, + KMemoryAttribute::Uncached | KMemoryAttribute::Locked, + KMemoryAttribute::Locked)); + } + } else /* if (dst_user) */ { + // The destination is a user buffer, so it should be unmapped + readable. + constexpr KMemoryPermission DestinationPermission = + KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite; + + // Copy the memory. + R_TRY(dst_page_table.CopyMemoryFromUserToLinear( + dst_message_buffer + offset_words, raw_size, KMemoryState::FlagReferenceCounted, + KMemoryState::FlagReferenceCounted, DestinationPermission, + KMemoryAttribute::Uncached, KMemoryAttribute::None, + src_message_buffer + offset_words)); + } + } + } + + // Perform (and validate) any remaining cleanup. + R_RETURN(CleanupMap(request, std::addressof(src_process), std::addressof(dst_page_table))); } -Result KServerSession::OnRequest(KSessionRequest* request) { - // Create the wait queue. - ThreadQueueImplForKServerSessionRequest wait_queue{m_kernel}; +void ReplyAsyncError(KProcess* to_process, uint64_t to_msg_buf, size_t to_msg_buf_size, + Result result) { + // Convert the address to a linear pointer. + u32* to_msg = to_process->GetMemory().GetPointer<u32>(to_msg_buf); + + // Set the error. + MessageBuffer msg(to_msg, to_msg_buf_size); + msg.SetAsyncResult(result); +} + +} // namespace + +KServerSession::KServerSession(KernelCore& kernel) + : KSynchronizationObject{kernel}, m_lock{m_kernel} {} + +KServerSession::~KServerSession() = default; + +void KServerSession::Destroy() { + m_parent->OnServerClosed(); + + this->CleanupRequests(); + + m_parent->Close(); +} + +Result KServerSession::ReceiveRequest(uintptr_t server_message, uintptr_t server_buffer_size, + KPhysicalAddress server_message_paddr, + std::shared_ptr<Service::HLERequestContext>* out_context, + std::weak_ptr<Service::SessionRequestManager> manager) { + // Lock the session. + KScopedLightLock lk{m_lock}; + + // Get the request and client thread. + KSessionRequest* request; + KThread* client_thread; { - // Lock the scheduler. KScopedSchedulerLock sl{m_kernel}; - // Ensure that we can handle new requests. - R_UNLESS(!m_parent->IsServerClosed(), ResultSessionClosed); + // Ensure that we can service the request. + R_UNLESS(!m_parent->IsClientClosed(), ResultSessionClosed); - // Check that we're not terminating. - R_UNLESS(!GetCurrentThread(m_kernel).IsTerminationRequested(), ResultTerminationRequested); + // Ensure we aren't already servicing a request. + R_UNLESS(m_current_request == nullptr, ResultNotFound); - // Get whether we're empty. - const bool was_empty = m_request_list.empty(); + // Ensure we have a request to service. + R_UNLESS(!m_request_list.empty(), ResultNotFound); - // Add the request to the list. - request->Open(); - m_request_list.push_back(*request); + // Pop the first request from the list. + request = std::addressof(m_request_list.front()); + m_request_list.pop_front(); - // If we were empty, signal. - if (was_empty) { - this->NotifyAvailable(); + // Get the thread for the request. + client_thread = request->GetThread(); + R_UNLESS(client_thread != nullptr, ResultSessionClosed); + + // Open the client thread. + client_thread->Open(); + } + + SCOPE_EXIT({ client_thread->Close(); }); + + // Set the request as our current. + m_current_request = request; + + // Get the client address. + uint64_t client_message = request->GetAddress(); + size_t client_buffer_size = request->GetSize(); + bool recv_list_broken = false; + + // Receive the message. + Result result = ResultSuccess; + + if (out_context != nullptr) { + // HLE request. + if (!client_message) { + client_message = GetInteger(client_thread->GetTlsAddress()); } + Core::Memory::Memory& memory{client_thread->GetOwnerProcess()->GetMemory()}; + u32* cmd_buf{reinterpret_cast<u32*>(memory.GetPointer(client_message))}; + *out_context = + std::make_shared<Service::HLERequestContext>(m_kernel, memory, this, client_thread); + (*out_context)->SetSessionRequestManager(manager); + (*out_context) + ->PopulateFromIncomingCommandBuffer(*client_thread->GetOwnerProcess(), cmd_buf); + // We succeeded. + R_SUCCEED(); + } else { + result = ReceiveMessage(m_kernel, recv_list_broken, server_message, server_buffer_size, + server_message_paddr, *client_thread, client_message, + client_buffer_size, this, request); + } - // If we have a request event, this is asynchronous, and we don't need to wait. - R_SUCCEED_IF(request->GetEvent() != nullptr); + // Handle cleanup on receive failure. + if (R_FAILED(result)) { + // Cache the result to return it to the client. + const Result result_for_client = result; - // This is a synchronous request, so we should wait for our request to complete. - GetCurrentThread(m_kernel).SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::IPC); - GetCurrentThread(m_kernel).BeginWait(std::addressof(wait_queue)); + // Clear the current request. + { + KScopedSchedulerLock sl(m_kernel); + ASSERT(m_current_request == request); + m_current_request = nullptr; + if (!m_request_list.empty()) { + this->NotifyAvailable(); + } + } + + // Reply to the client. + { + // After we reply, close our reference to the request. + SCOPE_EXIT({ request->Close(); }); + + // Get the event to check whether the request is async. + if (KEvent* event = request->GetEvent(); event != nullptr) { + // The client sent an async request. + KProcess* client = client_thread->GetOwnerProcess(); + auto& client_pt = client->GetPageTable(); + + // Send the async result. + if (R_FAILED(result_for_client)) { + ReplyAsyncError(client, client_message, client_buffer_size, result_for_client); + } + + // Unlock the client buffer. + // NOTE: Nintendo does not check the result of this. + client_pt.UnlockForIpcUserBuffer(client_message, client_buffer_size); + + // Signal the event. + event->Signal(); + } else { + // End the client thread's wait. + KScopedSchedulerLock sl(m_kernel); + + if (!client_thread->IsTerminationRequested()) { + client_thread->EndWait(result_for_client); + } + } + } + + // Set the server result. + if (recv_list_broken) { + result = ResultReceiveListBroken; + } else { + result = ResultNotFound; + } } - return GetCurrentThread(m_kernel).GetWaitResult(); + R_RETURN(result); } -Result KServerSession::SendReply(bool is_hle) { +Result KServerSession::SendReply(uintptr_t server_message, uintptr_t server_buffer_size, + KPhysicalAddress server_message_paddr, bool is_hle) { // Lock the session. KScopedLightLock lk{m_lock}; @@ -327,7 +1240,7 @@ Result KServerSession::SendReply(bool is_hle) { SCOPE_EXIT({ request->Close(); }); // Extract relevant information from the request. - const uintptr_t client_message = request->GetAddress(); + const uint64_t client_message = request->GetAddress(); const size_t client_buffer_size = request->GetSize(); KThread* client_thread = request->GetThread(); KEvent* event = request->GetEvent(); @@ -342,31 +1255,28 @@ Result KServerSession::SendReply(bool is_hle) { // HLE servers write directly to a pointer to the thread command buffer. Therefore // the reply has already been written in this case. } else { - Core::Memory::Memory& memory{client_thread->GetOwnerProcess()->GetMemory()}; - KThread* server_thread = GetCurrentThreadPointer(m_kernel); - KProcess& src_process = *client_thread->GetOwnerProcess(); - KProcess& dst_process = *server_thread->GetOwnerProcess(); - UNIMPLEMENTED_IF(server_thread->GetOwnerProcess() != client_thread->GetOwnerProcess()); - - auto* src_msg_buffer = memory.GetPointer<u32>(server_thread->GetTlsAddress()); - auto* dst_msg_buffer = memory.GetPointer<u32>(client_message); - std::memcpy(dst_msg_buffer, src_msg_buffer, client_buffer_size); - - // Translate special header ad-hoc. - MessageBuffer src_msg(src_msg_buffer, client_buffer_size); - MessageBuffer::MessageHeader src_header(src_msg); - MessageBuffer::SpecialHeader src_special_header(src_msg, src_header); - if (src_header.GetHasSpecialHeader()) { - MessageBuffer dst_msg(dst_msg_buffer, client_buffer_size); - result = ProcessMessageSpecialData<true>(dst_process, src_process, *server_thread, - dst_msg, src_msg, src_special_header); - if (R_FAILED(result)) { - CleanupSpecialData(dst_process, dst_msg_buffer, client_buffer_size); - } - } + result = SendMessage(m_kernel, server_message, server_buffer_size, server_message_paddr, + *client_thread, client_message, client_buffer_size, this, request); + } + } else if (!is_hle) { + // Otherwise, we'll need to do some cleanup. + KProcess* server_process = request->GetServerProcess(); + KProcess* client_process = + (client_thread != nullptr) ? client_thread->GetOwnerProcess() : nullptr; + KProcessPageTable* client_page_table = + (client_process != nullptr) ? std::addressof(client_process->GetPageTable()) : nullptr; + + // Cleanup server handles. + result = CleanupServerHandles(m_kernel, server_message, server_buffer_size, + server_message_paddr); + + // Cleanup mappings. + Result cleanup_map_result = CleanupMap(request, server_process, client_page_table); + + // If we successfully cleaned up handles, use the map cleanup result as our result. + if (R_SUCCEEDED(result)) { + result = cleanup_map_result; } - } else { - result = ResultSessionClosed; } // Select a result for the client. @@ -381,19 +1291,18 @@ Result KServerSession::SendReply(bool is_hle) { // If there's a client thread, update it. if (client_thread != nullptr) { if (event != nullptr) { - // // Get the client process/page table. - // KProcess *client_process = client_thread->GetOwnerProcess(); - // KProcessPageTable *client_page_table = std::addressof(client_process->PageTable()); + // Get the client process/page table. + KProcess* client_process = client_thread->GetOwnerProcess(); + KProcessPageTable* client_page_table = std::addressof(client_process->GetPageTable()); - // // If we need to, reply with an async error. - // if (R_FAILED(client_result)) { - // ReplyAsyncError(client_process, client_message, client_buffer_size, - // client_result); - // } + // If we need to, reply with an async error. + if (R_FAILED(client_result)) { + ReplyAsyncError(client_process, client_message, client_buffer_size, client_result); + } - // // Unlock the client buffer. - // // NOTE: Nintendo does not check the result of this. - // client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size); + // Unlock the client buffer. + // NOTE: Nintendo does not check the result of this. + client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size); // Signal the event. event->Signal(); @@ -410,91 +1319,53 @@ Result KServerSession::SendReply(bool is_hle) { R_RETURN(result); } -Result KServerSession::ReceiveRequest(std::shared_ptr<Service::HLERequestContext>* out_context, - std::weak_ptr<Service::SessionRequestManager> manager) { - // Lock the session. - KScopedLightLock lk{m_lock}; - - // Get the request and client thread. - KSessionRequest* request; - KThread* client_thread; +Result KServerSession::OnRequest(KSessionRequest* request) { + // Create the wait queue. + ThreadQueueImplForKServerSessionRequest wait_queue{m_kernel}; { + // Lock the scheduler. KScopedSchedulerLock sl{m_kernel}; - // Ensure that we can service the request. - R_UNLESS(!m_parent->IsClientClosed(), ResultSessionClosed); - - // Ensure we aren't already servicing a request. - R_UNLESS(m_current_request == nullptr, ResultNotFound); + // Ensure that we can handle new requests. + R_UNLESS(!m_parent->IsServerClosed(), ResultSessionClosed); - // Ensure we have a request to service. - R_UNLESS(!m_request_list.empty(), ResultNotFound); + // Check that we're not terminating. + R_UNLESS(!GetCurrentThread(m_kernel).IsTerminationRequested(), ResultTerminationRequested); - // Pop the first request from the list. - request = std::addressof(m_request_list.front()); - m_request_list.pop_front(); + // Get whether we're empty. + const bool was_empty = m_request_list.empty(); - // Get the thread for the request. - client_thread = request->GetThread(); - R_UNLESS(client_thread != nullptr, ResultSessionClosed); + // Add the request to the list. + request->Open(); + m_request_list.push_back(*request); - // Open the client thread. - client_thread->Open(); - } + // If we were empty, signal. + if (was_empty) { + this->NotifyAvailable(); + } - SCOPE_EXIT({ client_thread->Close(); }); + // If we have a request event, this is asynchronous, and we don't need to wait. + R_SUCCEED_IF(request->GetEvent() != nullptr); - // Set the request as our current. - m_current_request = request; + // This is a synchronous request, so we should wait for our request to complete. + GetCurrentThread(m_kernel).SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::IPC); + GetCurrentThread(m_kernel).BeginWait(std::addressof(wait_queue)); + } - // Get the client address. - uintptr_t client_message = request->GetAddress(); - size_t client_buffer_size = request->GetSize(); - // bool recv_list_broken = false; + return GetCurrentThread(m_kernel).GetWaitResult(); +} - if (!client_message) { - client_message = GetInteger(client_thread->GetTlsAddress()); - client_buffer_size = MessageBufferSize; - } +bool KServerSession::IsSignaled() const { + ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel)); - // Receive the message. - Core::Memory::Memory& memory{client_thread->GetOwnerProcess()->GetMemory()}; - if (out_context != nullptr) { - // HLE request. - u32* cmd_buf{reinterpret_cast<u32*>(memory.GetPointer(client_message))}; - *out_context = - std::make_shared<Service::HLERequestContext>(m_kernel, memory, this, client_thread); - (*out_context)->SetSessionRequestManager(manager); - (*out_context) - ->PopulateFromIncomingCommandBuffer(*client_thread->GetOwnerProcess(), cmd_buf); - } else { - KThread* server_thread = GetCurrentThreadPointer(m_kernel); - KProcess& src_process = *client_thread->GetOwnerProcess(); - KProcess& dst_process = *server_thread->GetOwnerProcess(); - UNIMPLEMENTED_IF(client_thread->GetOwnerProcess() != server_thread->GetOwnerProcess()); - - auto* src_msg_buffer = memory.GetPointer<u32>(client_message); - auto* dst_msg_buffer = memory.GetPointer<u32>(server_thread->GetTlsAddress()); - std::memcpy(dst_msg_buffer, src_msg_buffer, client_buffer_size); - - // Translate special header ad-hoc. - // TODO: fix this mess - MessageBuffer src_msg(src_msg_buffer, client_buffer_size); - MessageBuffer::MessageHeader src_header(src_msg); - MessageBuffer::SpecialHeader src_special_header(src_msg, src_header); - if (src_header.GetHasSpecialHeader()) { - MessageBuffer dst_msg(dst_msg_buffer, client_buffer_size); - Result res = ProcessMessageSpecialData<false>(dst_process, src_process, *client_thread, - dst_msg, src_msg, src_special_header); - if (R_FAILED(res)) { - CleanupSpecialData(dst_process, dst_msg_buffer, client_buffer_size); - } - } + // If the client is closed, we're always signaled. + if (m_parent->IsClientClosed()) { + return true; } - // We succeeded. - R_SUCCEED(); + // Otherwise, we're signaled if we have a request and aren't handling one. + return !m_request_list.empty() && m_current_request == nullptr; } void KServerSession::CleanupRequests() { @@ -527,31 +1398,30 @@ void KServerSession::CleanupRequests() { SCOPE_EXIT({ request->Close(); }); // Extract relevant information from the request. - // const uintptr_t client_message = request->GetAddress(); - // const size_t client_buffer_size = request->GetSize(); + const uint64_t client_message = request->GetAddress(); + const size_t client_buffer_size = request->GetSize(); KThread* client_thread = request->GetThread(); KEvent* event = request->GetEvent(); - // KProcess *server_process = request->GetServerProcess(); - // KProcess *client_process = (client_thread != nullptr) ? - // client_thread->GetOwnerProcess() : nullptr; - // KProcessPageTable *client_page_table = (client_process != nullptr) ? - // std::addressof(client_process->GetPageTable()) - // : nullptr; + KProcess* server_process = request->GetServerProcess(); + KProcess* client_process = + (client_thread != nullptr) ? client_thread->GetOwnerProcess() : nullptr; + KProcessPageTable* client_page_table = + (client_process != nullptr) ? std::addressof(client_process->GetPageTable()) : nullptr; // Cleanup the mappings. - // Result result = CleanupMap(request, server_process, client_page_table); + Result result = CleanupMap(request, server_process, client_page_table); // If there's a client thread, update it. if (client_thread != nullptr) { if (event != nullptr) { - // // We need to reply async. - // ReplyAsyncError(client_process, client_message, client_buffer_size, - // (R_SUCCEEDED(result) ? ResultSessionClosed : result)); + // We need to reply async. + ReplyAsyncError(client_process, client_message, client_buffer_size, + (R_SUCCEEDED(result) ? ResultSessionClosed : result)); - // // Unlock the client buffer. + // Unlock the client buffer. // NOTE: Nintendo does not check the result of this. - // client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size); + client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size); // Signal the event. event->Signal(); @@ -567,4 +1437,97 @@ void KServerSession::CleanupRequests() { } } +void KServerSession::OnClientClosed() { + KScopedLightLock lk{m_lock}; + + // Handle any pending requests. + KSessionRequest* prev_request = nullptr; + while (true) { + // Declare variables for processing the request. + KSessionRequest* request = nullptr; + KEvent* event = nullptr; + KThread* thread = nullptr; + bool cur_request = false; + bool terminate = false; + + // Get the next request. + { + KScopedSchedulerLock sl{m_kernel}; + + if (m_current_request != nullptr && m_current_request != prev_request) { + // Set the request, open a reference as we process it. + request = m_current_request; + request->Open(); + cur_request = true; + + // Get thread and event for the request. + thread = request->GetThread(); + event = request->GetEvent(); + + // If the thread is terminating, handle that. + if (thread->IsTerminationRequested()) { + request->ClearThread(); + request->ClearEvent(); + terminate = true; + } + + prev_request = request; + } else if (!m_request_list.empty()) { + // Pop the request from the front of the list. + request = std::addressof(m_request_list.front()); + m_request_list.pop_front(); + + // Get thread and event for the request. + thread = request->GetThread(); + event = request->GetEvent(); + } + } + + // If there are no requests, we're done. + if (request == nullptr) { + break; + } + + // All requests must have threads. + ASSERT(thread != nullptr); + + // Ensure that we close the request when done. + SCOPE_EXIT({ request->Close(); }); + + // If we're terminating, close a reference to the thread and event. + if (terminate) { + thread->Close(); + if (event != nullptr) { + event->Close(); + } + } + + // If we need to, reply. + if (event != nullptr && !cur_request) { + // There must be no mappings. + ASSERT(request->GetSendCount() == 0); + ASSERT(request->GetReceiveCount() == 0); + ASSERT(request->GetExchangeCount() == 0); + + // Get the process and page table. + KProcess* client_process = thread->GetOwnerProcess(); + auto& client_pt = client_process->GetPageTable(); + + // Reply to the request. + ReplyAsyncError(client_process, request->GetAddress(), request->GetSize(), + ResultSessionClosed); + + // Unlock the buffer. + // NOTE: Nintendo does not check the result of this. + client_pt.UnlockForIpcUserBuffer(request->GetAddress(), request->GetSize()); + + // Signal the event. + event->Signal(); + } + } + + // Notify. + this->NotifyAvailable(ResultSessionClosed); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/k_server_session.h b/src/core/hle/kernel/k_server_session.h index 403891919..2876c231b 100644 --- a/src/core/hle/kernel/k_server_session.h +++ b/src/core/hle/kernel/k_server_session.h @@ -49,14 +49,21 @@ public: bool IsSignaled() const override; void OnClientClosed(); - /// TODO: flesh these out to match the real kernel Result OnRequest(KSessionRequest* request); - Result SendReply(bool is_hle = false); - Result ReceiveRequest(std::shared_ptr<Service::HLERequestContext>* out_context = nullptr, + Result SendReply(uintptr_t server_message, uintptr_t server_buffer_size, + KPhysicalAddress server_message_paddr, bool is_hle = false); + Result ReceiveRequest(uintptr_t server_message, uintptr_t server_buffer_size, + KPhysicalAddress server_message_paddr, + std::shared_ptr<Service::HLERequestContext>* out_context = nullptr, std::weak_ptr<Service::SessionRequestManager> manager = {}); Result SendReplyHLE() { - return SendReply(true); + R_RETURN(this->SendReply(0, 0, 0, true)); + } + + Result ReceiveRequestHLE(std::shared_ptr<Service::HLERequestContext>* out_context, + std::weak_ptr<Service::SessionRequestManager> manager) { + R_RETURN(this->ReceiveRequest(0, 0, 0, out_context, manager)); } private: diff --git a/src/core/hle/kernel/k_session.cpp b/src/core/hle/kernel/k_session.cpp index 44d7a8f02..4a1f6027e 100644 --- a/src/core/hle/kernel/k_session.cpp +++ b/src/core/hle/kernel/k_session.cpp @@ -33,8 +33,7 @@ void KSession::Initialize(KClientPort* client_port, uintptr_t name) { m_name = name; // Set our owner process. - //! FIXME: this is the wrong process! - m_process = m_kernel.ApplicationProcess(); + m_process = GetCurrentProcessPointer(m_kernel); m_process->Open(); // Set our port. diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 7d9a6e9cf..24394d222 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -1422,8 +1422,7 @@ s32 GetCurrentCoreId(KernelCore& kernel) { } Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel) { - // TODO: per-process memory - return kernel.System().ApplicationMemory(); + return GetCurrentProcess(kernel).GetMemory(); } KScopedDisableDispatch::~KScopedDisableDispatch() { diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index e9925d231..f13e232b2 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h @@ -314,11 +314,7 @@ public: m_current_core_id = core; } - KProcess* GetOwnerProcess() { - return m_parent; - } - - const KProcess* GetOwnerProcess() const { + KProcess* GetOwnerProcess() const { return m_parent; } diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index e479dacde..c14d2d2f3 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -68,8 +68,6 @@ struct KernelCore::Impl { global_object_list_container = std::make_unique<KAutoObjectWithListContainer>(kernel); global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel); - global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel); - global_handle_table->Initialize(KHandleTable::MaxTableSize); is_phantom_mode_for_singlecore = false; @@ -121,13 +119,8 @@ struct KernelCore::Impl { next_user_process_id = KProcess::ProcessIdMin; next_thread_id = 1; - global_handle_table->Finalize(); - global_handle_table.reset(); - preemption_event = nullptr; - exclusive_monitor.reset(); - // Cleanup persistent kernel objects auto CleanupObject = [](KAutoObject* obj) { if (obj) { @@ -191,8 +184,6 @@ struct KernelCore::Impl { } void InitializePhysicalCores() { - exclusive_monitor = - Core::MakeExclusiveMonitor(system.ApplicationMemory(), Core::Hardware::NUM_CPU_CORES); for (u32 i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { const s32 core{static_cast<s32>(i)}; @@ -791,10 +782,6 @@ struct KernelCore::Impl { std::shared_ptr<Core::Timing::EventType> preemption_event; - // This is the kernel's handle table or supervisor handle table which - // stores all the objects in place. - std::unique_ptr<KHandleTable> global_handle_table; - std::unique_ptr<KAutoObjectWithListContainer> global_object_list_container; std::unique_ptr<KObjectNameGlobalData> object_name_global_data; @@ -805,7 +792,6 @@ struct KernelCore::Impl { std::mutex server_lock; std::vector<std::unique_ptr<Service::ServerManager>> server_managers; - std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; std::array<std::unique_ptr<Kernel::PhysicalCore>, Core::Hardware::NUM_CPU_CORES> cores; // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others @@ -882,10 +868,6 @@ KResourceLimit* KernelCore::GetSystemResourceLimit() { return impl->system_resource_limit; } -KScopedAutoObject<KThread> KernelCore::RetrieveThreadFromGlobalHandleTable(Handle handle) const { - return impl->global_handle_table->GetObject<KThread>(handle); -} - void KernelCore::AppendNewProcess(KProcess* process) { impl->process_list.push_back(process); } @@ -959,14 +941,6 @@ Kernel::KHardwareTimer& KernelCore::HardwareTimer() { return *impl->hardware_timer; } -Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() { - return *impl->exclusive_monitor; -} - -const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const { - return *impl->exclusive_monitor; -} - KAutoObjectWithListContainer& KernelCore::ObjectListContainer() { return *impl->global_object_list_container; } @@ -1030,14 +1004,6 @@ u64 KernelCore::CreateNewUserProcessID() { return impl->next_user_process_id++; } -KHandleTable& KernelCore::GlobalHandleTable() { - return *impl->global_handle_table; -} - -const KHandleTable& KernelCore::GlobalHandleTable() const { - return *impl->global_handle_table; -} - void KernelCore::RegisterCoreThread(std::size_t core_id) { impl->RegisterCoreThread(core_id); } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 78c88902c..5d4102145 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -116,9 +116,6 @@ public: /// Retrieves a shared pointer to the system resource limit instance. KResourceLimit* GetSystemResourceLimit(); - /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table. - KScopedAutoObject<KThread> RetrieveThreadFromGlobalHandleTable(Handle handle) const; - /// Adds the given shared pointer to an internal list of active processes. void AppendNewProcess(KProcess* process); @@ -170,10 +167,6 @@ public: /// Stops execution of 'id' core, in order to reschedule a new thread. void PrepareReschedule(std::size_t id); - Core::ExclusiveMonitor& GetExclusiveMonitor(); - - const Core::ExclusiveMonitor& GetExclusiveMonitor() const; - KAutoObjectWithListContainer& ObjectListContainer(); const KAutoObjectWithListContainer& ObjectListContainer() const; diff --git a/src/core/hle/kernel/message_buffer.h b/src/core/hle/kernel/message_buffer.h index 75b275310..d528a9bb3 100644 --- a/src/core/hle/kernel/message_buffer.h +++ b/src/core/hle/kernel/message_buffer.h @@ -18,13 +18,13 @@ public: static constexpr inline u64 NullTag = 0; public: - enum class ReceiveListCountType : u32 { - None = 0, - ToMessageBuffer = 1, - ToSingleBuffer = 2, + enum ReceiveListCountType : u32 { + ReceiveListCountType_None = 0, + ReceiveListCountType_ToMessageBuffer = 1, + ReceiveListCountType_ToSingleBuffer = 2, - CountOffset = 2, - CountMax = 13, + ReceiveListCountType_CountOffset = 2, + ReceiveListCountType_CountMax = 13, }; private: @@ -591,16 +591,16 @@ public: // Add the size of the receive list. const auto count = hdr.GetReceiveListCount(); switch (count) { - case MessageHeader::ReceiveListCountType::None: + case MessageHeader::ReceiveListCountType_None: break; - case MessageHeader::ReceiveListCountType::ToMessageBuffer: + case MessageHeader::ReceiveListCountType_ToMessageBuffer: break; - case MessageHeader::ReceiveListCountType::ToSingleBuffer: + case MessageHeader::ReceiveListCountType_ToSingleBuffer: msg_size += ReceiveListEntry::GetDataSize(); break; default: msg_size += (static_cast<s32>(count) - - static_cast<s32>(MessageHeader::ReceiveListCountType::CountOffset)) * + static_cast<s32>(MessageHeader::ReceiveListCountType_CountOffset)) * ReceiveListEntry::GetDataSize(); break; } diff --git a/src/core/hle/kernel/svc/svc_info.cpp b/src/core/hle/kernel/svc/svc_info.cpp index ada998772..231e4d0e1 100644 --- a/src/core/hle/kernel/svc/svc_info.cpp +++ b/src/core/hle/kernel/svc/svc_info.cpp @@ -118,7 +118,6 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle R_SUCCEED(); case InfoType::IsApplication: - LOG_WARNING(Kernel_SVC, "(STUBBED) Assuming process is application"); *result = process->IsApplication(); R_SUCCEED(); diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp index 47a3e7bb0..85cc4f561 100644 --- a/src/core/hle/kernel/svc/svc_ipc.cpp +++ b/src/core/hle/kernel/svc/svc_ipc.cpp @@ -48,8 +48,7 @@ Result ReplyAndReceiveImpl(KernelCore& kernel, int32_t* out_index, uintptr_t mes }; // Send the reply. - R_TRY(session->SendReply()); - // R_TRY(session->SendReply(message, buffer_size, message_paddr)); + R_TRY(session->SendReply(message, buffer_size, message_paddr)); } // Receive a message. @@ -85,8 +84,7 @@ Result ReplyAndReceiveImpl(KernelCore& kernel, int32_t* out_index, uintptr_t mes if (R_SUCCEEDED(result)) { KServerSession* session = objs[index]->DynamicCast<KServerSession*>(); if (session != nullptr) { - // result = session->ReceiveRequest(message, buffer_size, message_paddr); - result = session->ReceiveRequest(); + result = session->ReceiveRequest(message, buffer_size, message_paddr); if (ResultNotFound == result) { continue; } diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h index e1ad78607..38e71d516 100644 --- a/src/core/hle/kernel/svc_results.h +++ b/src/core/hle/kernel/svc_results.h @@ -38,7 +38,9 @@ constexpr Result ResultInvalidState{ErrorModule::Kernel, 125}; constexpr Result ResultReservedUsed{ErrorModule::Kernel, 126}; constexpr Result ResultPortClosed{ErrorModule::Kernel, 131}; constexpr Result ResultLimitReached{ErrorModule::Kernel, 132}; +constexpr Result ResultReceiveListBroken{ErrorModule::Kernel, 258}; constexpr Result ResultOutOfAddressSpace{ErrorModule::Kernel, 259}; +constexpr Result ResultMessageTooLarge{ErrorModule::Kernel, 260}; constexpr Result ResultInvalidId{ErrorModule::Kernel, 519}; } // namespace Kernel diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp index fe2ed8df8..31da86074 100644 --- a/src/core/hle/service/fatal/fatal.cpp +++ b/src/core/hle/service/fatal/fatal.cpp @@ -89,7 +89,7 @@ static void GenerateErrorReport(Core::System& system, Result error_code, const F crash_report += fmt::format(" ESR: {:016x}\n", info.esr); crash_report += fmt::format(" FAR: {:016x}\n", info.far); crash_report += "\nBacktrace:\n"; - for (size_t i = 0; i < info.backtrace_size; i++) { + for (u32 i = 0; i < std::min<u32>(info.backtrace_size, 32); i++) { crash_report += fmt::format(" Backtrace[{:02d}]: {:016x}\n", i, info.backtrace[i]); } diff --git a/src/core/hle/service/hid/hid_server.cpp b/src/core/hle/service/hid/hid_server.cpp index de24b0401..06a01c02c 100644 --- a/src/core/hle/service/hid/hid_server.cpp +++ b/src/core/hle/service/hid/hid_server.cpp @@ -51,7 +51,7 @@ private: IPC::RequestParser rp{ctx}; const auto vibration_device_handle{rp.PopRaw<Core::HID::VibrationDeviceHandle>()}; - if (resource_manager != nullptr) { + if (resource_manager != nullptr && resource_manager->GetNpad()) { resource_manager->GetNpad()->InitializeVibrationDevice(vibration_device_handle); } diff --git a/src/core/hle/service/ipc_helpers.h b/src/core/hle/service/ipc_helpers.h index 0e222362e..4b02872fb 100644 --- a/src/core/hle/service/ipc_helpers.h +++ b/src/core/hle/service/ipc_helpers.h @@ -151,8 +151,8 @@ public: if (manager->IsDomain()) { context->AddDomainObject(std::move(iface)); } else { - kernel.ApplicationProcess()->GetResourceLimit()->Reserve( - Kernel::LimitableResource::SessionCountMax, 1); + ASSERT(Kernel::GetCurrentProcess(kernel).GetResourceLimit()->Reserve( + Kernel::LimitableResource::SessionCountMax, 1)); auto* session = Kernel::KSession::Create(kernel); session->Initialize(nullptr, 0); diff --git a/src/core/hle/service/server_manager.cpp b/src/core/hle/service/server_manager.cpp index 6808247a9..15edb23e0 100644 --- a/src/core/hle/service/server_manager.cpp +++ b/src/core/hle/service/server_manager.cpp @@ -47,7 +47,7 @@ ServerManager::~ServerManager() { m_stopped.Wait(); m_threads.clear(); - // Clean up ports. + // Clean up server ports. for (const auto& [port, handler] : m_ports) { port->Close(); } @@ -97,22 +97,15 @@ Result ServerManager::RegisterNamedService(const std::string& service_name, u32 max_sessions) { ASSERT(m_sessions.size() + m_ports.size() < MaximumWaitObjects); - // Add the new server to sm:. - ASSERT(R_SUCCEEDED( - m_system.ServiceManager().RegisterService(service_name, max_sessions, handler_factory))); - - // Get the registered port. - Kernel::KPort* port{}; - ASSERT( - R_SUCCEEDED(m_system.ServiceManager().GetServicePort(std::addressof(port), service_name))); - - // Open a new reference to the server port. - port->GetServerPort().Open(); + // Add the new server to sm: and get the moved server port. + Kernel::KServerPort* server_port{}; + R_ASSERT(m_system.ServiceManager().RegisterService(std::addressof(server_port), service_name, + max_sessions, handler_factory)); // Begin tracking the server port. { std::scoped_lock ll{m_list_mutex}; - m_ports.emplace(std::addressof(port->GetServerPort()), std::move(handler_factory)); + m_ports.emplace(server_port, std::move(handler_factory)); } // Signal the wakeup event. @@ -372,7 +365,7 @@ Result ServerManager::OnSessionEvent(Kernel::KServerSession* session, // Try to receive a message. std::shared_ptr<HLERequestContext> context; - rc = session->ReceiveRequest(&context, manager); + rc = session->ReceiveRequestHLE(&context, manager); // If the session has been closed, we're done. if (rc == Kernel::ResultSessionClosed) { diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp index 0653779d5..8e637f963 100644 --- a/src/core/hle/service/set/set_sys.cpp +++ b/src/core/hle/service/set/set_sys.cpp @@ -507,6 +507,14 @@ void SET_SYS::SetTvSettings(HLERequestContext& ctx) { rb.Push(ResultSuccess); } +void SET_SYS::GetDebugModeFlag(HLERequestContext& ctx) { + LOG_DEBUG(Service_SET, "called"); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(ResultSuccess); + rb.Push<u32>(0); +} + void SET_SYS::GetQuestFlag(HLERequestContext& ctx) { LOG_WARNING(Service_SET, "(STUBBED) called"); @@ -926,7 +934,7 @@ SET_SYS::SET_SYS(Core::System& system_) : ServiceFramework{system_, "set:sys"}, {59, &SET_SYS::SetNetworkSystemClockContext, "SetNetworkSystemClockContext"}, {60, &SET_SYS::IsUserSystemClockAutomaticCorrectionEnabled, "IsUserSystemClockAutomaticCorrectionEnabled"}, {61, &SET_SYS::SetUserSystemClockAutomaticCorrectionEnabled, "SetUserSystemClockAutomaticCorrectionEnabled"}, - {62, nullptr, "GetDebugModeFlag"}, + {62, &SET_SYS::GetDebugModeFlag, "GetDebugModeFlag"}, {63, &SET_SYS::GetPrimaryAlbumStorage, "GetPrimaryAlbumStorage"}, {64, nullptr, "SetPrimaryAlbumStorage"}, {65, nullptr, "GetUsb30EnableFlag"}, @@ -1143,6 +1151,8 @@ void SET_SYS::StoreSettings() { } void SET_SYS::StoreSettingsThreadFunc(std::stop_token stop_token) { + Common::SetCurrentThreadName("SettingsStore"); + while (Common::StoppableTimedWait(stop_token, std::chrono::minutes(1))) { std::scoped_lock l{m_save_needed_mutex}; if (!std::exchange(m_save_needed, false)) { diff --git a/src/core/hle/service/set/set_sys.h b/src/core/hle/service/set/set_sys.h index 3785d93d8..853f76fce 100644 --- a/src/core/hle/service/set/set_sys.h +++ b/src/core/hle/service/set/set_sys.h @@ -98,6 +98,7 @@ private: void GetSettingsItemValue(HLERequestContext& ctx); void GetTvSettings(HLERequestContext& ctx); void SetTvSettings(HLERequestContext& ctx); + void GetDebugModeFlag(HLERequestContext& ctx); void GetQuestFlag(HLERequestContext& ctx); void GetDeviceTimeZoneLocationName(HLERequestContext& ctx); void SetDeviceTimeZoneLocationName(HLERequestContext& ctx); diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp index 296ee6e89..1095dcf6c 100644 --- a/src/core/hle/service/sm/sm.cpp +++ b/src/core/hle/service/sm/sm.cpp @@ -29,8 +29,7 @@ ServiceManager::ServiceManager(Kernel::KernelCore& kernel_) : kernel{kernel_} { ServiceManager::~ServiceManager() { for (auto& [name, port] : service_ports) { - port->GetClientPort().Close(); - port->GetServerPort().Close(); + port->Close(); } if (deferral_event) { @@ -50,8 +49,8 @@ static Result ValidateServiceName(const std::string& name) { return ResultSuccess; } -Result ServiceManager::RegisterService(std::string name, u32 max_sessions, - SessionRequestHandlerFactory handler) { +Result ServiceManager::RegisterService(Kernel::KServerPort** out_server_port, std::string name, + u32 max_sessions, SessionRequestHandlerFactory handler) { R_TRY(ValidateServiceName(name)); std::scoped_lock lk{lock}; @@ -66,13 +65,17 @@ Result ServiceManager::RegisterService(std::string name, u32 max_sessions, // Register the port. Kernel::KPort::Register(kernel, port); - service_ports.emplace(name, port); + service_ports.emplace(name, std::addressof(port->GetClientPort())); registered_services.emplace(name, handler); if (deferral_event) { deferral_event->Signal(); } - return ResultSuccess; + // Set our output. + *out_server_port = std::addressof(port->GetServerPort()); + + // We succeeded. + R_SUCCEED(); } Result ServiceManager::UnregisterService(const std::string& name) { @@ -91,7 +94,8 @@ Result ServiceManager::UnregisterService(const std::string& name) { return ResultSuccess; } -Result ServiceManager::GetServicePort(Kernel::KPort** out_port, const std::string& name) { +Result ServiceManager::GetServicePort(Kernel::KClientPort** out_client_port, + const std::string& name) { R_TRY(ValidateServiceName(name)); std::scoped_lock lk{lock}; @@ -101,7 +105,7 @@ Result ServiceManager::GetServicePort(Kernel::KPort** out_port, const std::strin return Service::SM::ResultNotRegistered; } - *out_port = it->second; + *out_client_port = it->second; return ResultSuccess; } @@ -172,8 +176,8 @@ Result SM::GetServiceImpl(Kernel::KClientSession** out_client_session, HLEReques std::string name(PopServiceName(rp)); // Find the named port. - Kernel::KPort* port{}; - auto port_result = service_manager.GetServicePort(&port, name); + Kernel::KClientPort* client_port{}; + auto port_result = service_manager.GetServicePort(&client_port, name); if (port_result == Service::SM::ResultInvalidServiceName) { LOG_ERROR(Service_SM, "Invalid service name '{}'", name); return Service::SM::ResultInvalidServiceName; @@ -187,7 +191,7 @@ Result SM::GetServiceImpl(Kernel::KClientSession** out_client_session, HLEReques // Create a new session. Kernel::KClientSession* session{}; - if (const auto result = port->GetClientPort().CreateSession(&session); result.IsError()) { + if (const auto result = client_port->CreateSession(&session); result.IsError()) { LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, result.raw); return result; } @@ -221,7 +225,9 @@ void SM::RegisterServiceImpl(HLERequestContext& ctx, std::string name, u32 max_s LOG_DEBUG(Service_SM, "called with name={}, max_session_count={}, is_light={}", name, max_session_count, is_light); - if (const auto result = service_manager.RegisterService(name, max_session_count, nullptr); + Kernel::KServerPort* server_port{}; + if (const auto result = service_manager.RegisterService(std::addressof(server_port), name, + max_session_count, nullptr); result.IsError()) { LOG_ERROR(Service_SM, "failed to register service with error_code={:08X}", result.raw); IPC::ResponseBuilder rb{ctx, 2}; @@ -229,13 +235,9 @@ void SM::RegisterServiceImpl(HLERequestContext& ctx, std::string name, u32 max_s return; } - auto* port = Kernel::KPort::Create(kernel); - port->Initialize(ServerSessionCountMax, is_light, 0); - SCOPE_EXIT({ port->GetClientPort().Close(); }); - IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; rb.Push(ResultSuccess); - rb.PushMoveObjects(port->GetServerPort()); + rb.PushMoveObjects(server_port); } void SM::UnregisterService(HLERequestContext& ctx) { diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h index ff74f588a..4ae32a9c1 100644 --- a/src/core/hle/service/sm/sm.h +++ b/src/core/hle/service/sm/sm.h @@ -56,10 +56,10 @@ public: explicit ServiceManager(Kernel::KernelCore& kernel_); ~ServiceManager(); - Result RegisterService(std::string name, u32 max_sessions, - SessionRequestHandlerFactory handler_factory); + Result RegisterService(Kernel::KServerPort** out_server_port, std::string name, + u32 max_sessions, SessionRequestHandlerFactory handler_factory); Result UnregisterService(const std::string& name); - Result GetServicePort(Kernel::KPort** out_port, const std::string& name); + Result GetServicePort(Kernel::KClientPort** out_client_port, const std::string& name); template <Common::DerivedFrom<SessionRequestHandler> T> std::shared_ptr<T> GetService(const std::string& service_name) const { @@ -84,7 +84,7 @@ private: /// Map of registered services, retrieved using GetServicePort. std::mutex lock; std::unordered_map<std::string, SessionRequestHandlerFactory> registered_services; - std::unordered_map<std::string, Kernel::KPort*> service_ports; + std::unordered_map<std::string, Kernel::KClientPort*> service_ports; /// Kernel context Kernel::KernelCore& kernel; diff --git a/src/core/hle/service/sm/sm_controller.cpp b/src/core/hle/service/sm/sm_controller.cpp index 7dce28fe0..7f0fb91d0 100644 --- a/src/core/hle/service/sm/sm_controller.cpp +++ b/src/core/hle/service/sm/sm_controller.cpp @@ -28,7 +28,6 @@ void Controller::ConvertCurrentObjectToDomain(HLERequestContext& ctx) { void Controller::CloneCurrentObject(HLERequestContext& ctx) { LOG_DEBUG(Service, "called"); - auto& process = *ctx.GetThread().GetOwnerProcess(); auto session_manager = ctx.GetManager(); // FIXME: this is duplicated from the SVC, it should just call it instead @@ -36,11 +35,11 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) { // Reserve a new session from the process resource limit. Kernel::KScopedResourceReservation session_reservation( - &process, Kernel::LimitableResource::SessionCountMax); + Kernel::GetCurrentProcessPointer(kernel), Kernel::LimitableResource::SessionCountMax); ASSERT(session_reservation.Succeeded()); // Create the session. - Kernel::KSession* session = Kernel::KSession::Create(system.Kernel()); + Kernel::KSession* session = Kernel::KSession::Create(kernel); ASSERT(session != nullptr); // Initialize the session. @@ -50,7 +49,7 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) { session_reservation.Commit(); // Register the session. - Kernel::KSession::Register(system.Kernel(), session); + Kernel::KSession::Register(kernel, session); // Register with server manager. session_manager->GetServerManager().RegisterSession(&session->GetServerSession(), diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 60ee78e89..c9f8707b7 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -129,9 +129,10 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect } metadata.Print(); - // Enable NCE only for programs with 39-bit address space. + // Enable NCE only for applications with 39-bit address space. const bool is_39bit = metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is39Bit; + const bool is_application = metadata.GetPoolPartition() == FileSys::PoolPartition::Application; Settings::SetNceEnabled(is_39bit); const std::array static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", @@ -147,7 +148,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* { #ifdef HAS_NCE - if (Settings::IsNceEnabled()) { + if (is_application && Settings::IsNceEnabled()) { return &module_patchers[i]; } #endif @@ -175,7 +176,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect // Enable direct memory mapping in case of NCE. const u64 fastmem_base = [&]() -> size_t { - if (Settings::IsNceEnabled()) { + if (is_application && Settings::IsNceEnabled()) { auto& buffer = system.DeviceMemory().buffer; buffer.EnableDirectMappedAddress(); return reinterpret_cast<u64>(buffer.VirtualBasePointer()); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 169bf4c8c..c7eb32c19 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -45,7 +45,13 @@ struct Memory::Impl { void SetCurrentPageTable(Kernel::KProcess& process) { current_page_table = &process.GetPageTable().GetImpl(); - current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); + + if (std::addressof(process) == system.ApplicationProcess() && + Settings::IsFastmemEnabled()) { + current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); + } else { + current_page_table->fastmem_arena = nullptr; + } } void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, @@ -57,7 +63,7 @@ struct Memory::Impl { MapPages(page_table, base / YUZU_PAGESIZE, size / YUZU_PAGESIZE, target, Common::PageType::Memory); - if (Settings::IsFastmemEnabled()) { + if (current_page_table->fastmem_arena) { system.DeviceMemory().buffer.Map(GetInteger(base), GetInteger(target) - DramMemoryMap::Base, size, perms); } @@ -69,7 +75,7 @@ struct Memory::Impl { MapPages(page_table, base / YUZU_PAGESIZE, size / YUZU_PAGESIZE, 0, Common::PageType::Unmapped); - if (Settings::IsFastmemEnabled()) { + if (current_page_table->fastmem_arena) { system.DeviceMemory().buffer.Unmap(GetInteger(base), size); } } @@ -79,7 +85,7 @@ struct Memory::Impl { ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((vaddr & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", vaddr); - if (!Settings::IsFastmemEnabled()) { + if (!current_page_table->fastmem_arena) { return; } @@ -88,11 +94,6 @@ struct Memory::Impl { const bool is_x = True(perms & Common::MemoryPermission::Execute) && Settings::IsNceEnabled(); - if (!current_page_table) { - system.DeviceMemory().buffer.Protect(vaddr, size, is_r, is_w, is_x); - return; - } - u64 protect_bytes{}; u64 protect_begin{}; for (u64 addr = vaddr; addr < vaddr + size; addr += YUZU_PAGESIZE) { @@ -239,7 +240,7 @@ struct Memory::Impl { bool WalkBlock(const Common::ProcessAddress addr, const std::size_t size, auto on_unmapped, auto on_memory, auto on_rasterizer, auto increment) { - const auto& page_table = system.ApplicationProcess()->GetPageTable().GetImpl(); + const auto& page_table = *current_page_table; std::size_t remaining_size = size; std::size_t page_index = addr >> YUZU_PAGEBITS; std::size_t page_offset = addr & YUZU_PAGEMASK; @@ -484,7 +485,7 @@ struct Memory::Impl { return; } - if (Settings::IsFastmemEnabled()) { + if (current_page_table->fastmem_arena) { system.DeviceMemory().buffer.Protect(vaddr, size, !debug, !debug); } @@ -541,7 +542,7 @@ struct Memory::Impl { return; } - if (Settings::IsFastmemEnabled()) { + if (current_page_table->fastmem_arena) { const bool is_read_enable = !Settings::values.use_reactive_flushing.GetValue() || !cached; system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); @@ -886,8 +887,7 @@ void Memory::ProtectRegion(Common::PageTable& page_table, Common::ProcessAddress } bool Memory::IsValidVirtualAddress(const Common::ProcessAddress vaddr) const { - const Kernel::KProcess& process = *system.ApplicationProcess(); - const auto& page_table = process.GetPageTable().GetImpl(); + const auto& page_table = *impl->current_page_table; const size_t page = vaddr >> YUZU_PAGEBITS; if (page >= page_table.pointers.size()) { return false; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e5a78a914..feca5105f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -74,6 +74,11 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::ClipDistance7: { const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)}; const u32 index{static_cast<u32>(attr) - base}; + if (index >= ctx.profile.max_user_clip_distances) { + LOG_WARNING(Shader, "Ignoring clip distance store {} >= {} supported", index, + ctx.profile.max_user_clip_distances); + return std::nullopt; + } const Id clip_num{ctx.Const(index)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 22ceca19c..800754554 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -214,16 +214,16 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& ind } } -Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { +std::pair<Id, bool> Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { if (!index.IsImmediate() || index.U32() != 0) { throw NotImplementedException("Indirect image indexing"); } if (info.type == TextureType::Buffer) { const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)}; - return ctx.OpLoad(def.image_type, def.id); + return {ctx.OpLoad(def.image_type, def.id), def.is_integer}; } else { const ImageDefinition def{ctx.images.at(info.descriptor_index)}; - return ctx.OpLoad(def.image_type, def.id); + return {ctx.OpLoad(def.image_type, def.id), def.is_integer}; } } @@ -566,13 +566,23 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host"); return ctx.ConstantNull(ctx.U32[4]); } - return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], - Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{}); + const auto [image, is_integer] = Image(ctx, index, info); + const Id result_type{is_integer ? ctx.U32[4] : ctx.F32[4]}; + Id color{Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, + result_type, image, coords, std::nullopt, std::span<const Id>{})}; + if (!is_integer) { + color = ctx.OpBitcast(ctx.U32[4], color); + } + return color; } void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { const auto info{inst->Flags<IR::TextureInstInfo>()}; - ctx.OpImageWrite(Image(ctx, index, info), coords, color); + const auto [image, is_integer] = Image(ctx, index, info); + if (!is_integer) { + color = ctx.OpBitcast(ctx.F32[4], color); + } + ctx.OpImageWrite(image, coords, color); } Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 3350f1f85..ed023fcfe 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -74,20 +74,19 @@ spv::ImageFormat GetImageFormat(ImageFormat format) { throw InvalidArgument("Invalid image format {}", format); } -Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { +Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) { const spv::ImageFormat format{GetImageFormat(desc.format)}; - const Id type{ctx.U32[1]}; switch (desc.type) { case TextureType::Color1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format); + return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format); case TextureType::ColorArray1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format); + return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 2, format); case TextureType::Color2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format); + return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format); case TextureType::ColorArray2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format); + return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, 2, format); case TextureType::Color3D: - return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format); + return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, 2, format); case TextureType::Buffer: throw NotImplementedException("Image buffer"); default: @@ -1273,7 +1272,9 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { throw NotImplementedException("Array of image buffers"); } const spv::ImageFormat format{GetImageFormat(desc.format)}; - const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)}; + const Id sampled_type{desc.is_integer ? U32[1] : F32[1]}; + const Id image_type{ + TypeImage(sampled_type, spv::Dim::Buffer, false, false, false, 2, format)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); @@ -1283,6 +1284,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { .id = id, .image_type = image_type, .count = desc.count, + .is_integer = desc.is_integer, }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); @@ -1327,7 +1329,8 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde if (desc.count != 1) { throw NotImplementedException("Array of images"); } - const Id image_type{ImageType(*this, desc)}; + const Id sampled_type{desc.is_integer ? U32[1] : F32[1]}; + const Id image_type{ImageType(*this, desc, sampled_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); @@ -1337,6 +1340,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde .id = id, .image_type = image_type, .count = desc.count, + .is_integer = desc.is_integer, }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); @@ -1528,7 +1532,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } - const Id type{TypeArray(F32[1], Const(8U))}; + const Id type{TypeArray( + F32[1], Const(std::min(info.used_clip_distances, profile.max_user_clip_distances)))}; clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } if (info.stores[IR::Attribute::Layer] && diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 1aa79863d..56019ad89 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -47,12 +47,14 @@ struct ImageBufferDefinition { Id id; Id image_type; u32 count; + bool is_integer; }; struct ImageDefinition { Id id; Id image_type; u32 count; + bool is_integer; }; struct UniformDefinitions { diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 15285ab0a..e30bf094a 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -24,6 +24,8 @@ public: [[nodiscard]] virtual TexturePixelFormat ReadTexturePixelFormat(u32 raw_handle) = 0; + [[nodiscard]] virtual bool IsTexturePixelFormatInteger(u32 raw_handle) = 0; + [[nodiscard]] virtual u32 ReadViewportTransformState() = 0; [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 70292686f..cb82a326c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -913,7 +913,11 @@ void GatherInfoFromHeader(Environment& env, Info& info) { } for (size_t index = 0; index < 8; ++index) { const u16 mask{header.vtg.omap_systemc.clip_distances}; - info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + const bool used{((mask >> index) & 1) != 0}; + info.stores.Set(IR::Attribute::ClipDistance0 + index, used); + if (used) { + info.used_clip_distances = static_cast<u32>(index) + 1; + } } info.stores.Set(IR::Attribute::PrimitiveId, header.vtg.omap_systemb.primitive_array_id != 0); diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ec12c843a..e4a73a360 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -815,6 +815,15 @@ bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coor return true; } +void ConvertDerivatives(std::array<IR::Value, 3>& results, IR::IREmitter& ir) { + for (size_t i = 0; i < 3; i++) { + if (results[i].Type() == IR::Type::U32) { + results[i] = results[i].IsImmediate() ? ir.Imm32(Common::BitCast<f32>(results[i].U32())) + : ir.BitCast<IR::F32>(IR::U32(results[i])); + } + } +} + void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>(); auto orig_opcode = inst.GetOpcode(); @@ -831,12 +840,14 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { if (!offset.IsImmediate()) { return; } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::Inst* const inst2 = coords.InstRecursive(); std::array<std::array<IR::Value, 3>, 3> results_matrix; for (size_t i = 0; i < 3; i++) { if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) { return; } + ConvertDerivatives(results_matrix[i], ir); } IR::F32 lod_clamp{}; if (info.has_lod_clamp != 0) { @@ -846,7 +857,6 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { lod_clamp = IR::F32{bias_lc}; } } - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::Value new_coords = ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]); IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index d374c976a..100437f0e 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -372,6 +372,10 @@ TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAdd return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf)); } +bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf) { + return env.IsTexturePixelFormatInteger(GetTextureHandle(env, cbuf)); +} + class Descriptors { public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, @@ -403,6 +407,7 @@ public: })}; image_buffer_descriptors[index].is_written |= desc.is_written; image_buffer_descriptors[index].is_read |= desc.is_read; + image_buffer_descriptors[index].is_integer |= desc.is_integer; return index; } @@ -432,6 +437,7 @@ public: })}; image_descriptors[index].is_written |= desc.is_written; image_descriptors[index].is_read |= desc.is_read; + image_descriptors[index].is_integer |= desc.is_integer; return index; } @@ -469,6 +475,20 @@ void PatchImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { ir.FPRecip(ir.ConvertUToF(32, 32, ir.CompositeExtract(texture_size, 1)))))); } +bool IsPixelFormatSNorm(TexturePixelFormat pixel_format) { + switch (pixel_format) { + case TexturePixelFormat::A8B8G8R8_SNORM: + case TexturePixelFormat::R8G8_SNORM: + case TexturePixelFormat::R8_SNORM: + case TexturePixelFormat::R16G16B16A16_SNORM: + case TexturePixelFormat::R16G16_SNORM: + case TexturePixelFormat::R16_SNORM: + return true; + default: + return false; + } +} + void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_format) { const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -587,11 +607,13 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite}; + const bool is_integer{IsTexturePixelFormatInteger(env, cbuf)}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, .is_written = is_written, .is_read = is_read, + .is_integer = is_integer, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, @@ -603,6 +625,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .format = flags.image_format, .is_written = is_written, .is_read = is_read, + .is_integer = is_integer, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, @@ -658,7 +681,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo if (!host_info.support_snorm_render_buffer && inst->GetOpcode() == IR::Opcode::ImageFetch && flags.type == TextureType::Buffer) { const auto pixel_format = ReadTexturePixelFormat(env, cbuf); - if (pixel_format != TexturePixelFormat::OTHER) { + if (IsPixelFormatSNorm(pixel_format)) { PatchTexelFetch(*texture_inst.block, *texture_inst.inst, pixel_format); } } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 66901a965..7578d41cc 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -87,6 +87,8 @@ struct Profile { bool has_broken_robust{}; u64 min_ssbo_alignment{}; + + u32 max_user_clip_distances{}; }; } // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index b4b4afd37..ed13e6820 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -35,14 +35,109 @@ enum class TextureType : u32 { }; constexpr u32 NUM_TEXTURE_TYPES = 9; -enum class TexturePixelFormat : u32 { +enum class TexturePixelFormat { + A8B8G8R8_UNORM, A8B8G8R8_SNORM, + A8B8G8R8_SINT, + A8B8G8R8_UINT, + R5G6B5_UNORM, + B5G6R5_UNORM, + A1R5G5B5_UNORM, + A2B10G10R10_UNORM, + A2B10G10R10_UINT, + A2R10G10B10_UNORM, + A1B5G5R5_UNORM, + A5B5G5R1_UNORM, + R8_UNORM, R8_SNORM, - R8G8_SNORM, + R8_SINT, + R8_UINT, + R16G16B16A16_FLOAT, + R16G16B16A16_UNORM, R16G16B16A16_SNORM, - R16G16_SNORM, + R16G16B16A16_SINT, + R16G16B16A16_UINT, + B10G11R11_FLOAT, + R32G32B32A32_UINT, + BC1_RGBA_UNORM, + BC2_UNORM, + BC3_UNORM, + BC4_UNORM, + BC4_SNORM, + BC5_UNORM, + BC5_SNORM, + BC7_UNORM, + BC6H_UFLOAT, + BC6H_SFLOAT, + ASTC_2D_4X4_UNORM, + B8G8R8A8_UNORM, + R32G32B32A32_FLOAT, + R32G32B32A32_SINT, + R32G32_FLOAT, + R32G32_SINT, + R32_FLOAT, + R16_FLOAT, + R16_UNORM, R16_SNORM, - OTHER + R16_UINT, + R16_SINT, + R16G16_UNORM, + R16G16_FLOAT, + R16G16_UINT, + R16G16_SINT, + R16G16_SNORM, + R32G32B32_FLOAT, + A8B8G8R8_SRGB, + R8G8_UNORM, + R8G8_SNORM, + R8G8_SINT, + R8G8_UINT, + R32G32_UINT, + R16G16B16X16_FLOAT, + R32_UINT, + R32_SINT, + ASTC_2D_8X8_UNORM, + ASTC_2D_8X5_UNORM, + ASTC_2D_5X4_UNORM, + B8G8R8A8_SRGB, + BC1_RGBA_SRGB, + BC2_SRGB, + BC3_SRGB, + BC7_SRGB, + A4B4G4R4_UNORM, + G4R4_UNORM, + ASTC_2D_4X4_SRGB, + ASTC_2D_8X8_SRGB, + ASTC_2D_8X5_SRGB, + ASTC_2D_5X4_SRGB, + ASTC_2D_5X5_UNORM, + ASTC_2D_5X5_SRGB, + ASTC_2D_10X8_UNORM, + ASTC_2D_10X8_SRGB, + ASTC_2D_6X6_UNORM, + ASTC_2D_6X6_SRGB, + ASTC_2D_10X6_UNORM, + ASTC_2D_10X6_SRGB, + ASTC_2D_10X5_UNORM, + ASTC_2D_10X5_SRGB, + ASTC_2D_10X10_UNORM, + ASTC_2D_10X10_SRGB, + ASTC_2D_12X10_UNORM, + ASTC_2D_12X10_SRGB, + ASTC_2D_12X12_UNORM, + ASTC_2D_12X12_SRGB, + ASTC_2D_8X6_UNORM, + ASTC_2D_8X6_SRGB, + ASTC_2D_6X5_UNORM, + ASTC_2D_6X5_SRGB, + E5B9G9R9_FLOAT, + D32_FLOAT, + D16_UNORM, + X8_D24_UNORM, + S8_UINT, + D24_UNORM_S8_UINT, + S8_UINT_D24_UNORM, + D32_FLOAT_S8_UINT, }; enum class ImageFormat : u32 { @@ -97,6 +192,7 @@ struct ImageBufferDescriptor { ImageFormat format; bool is_written; bool is_read; + bool is_integer; u32 cbuf_index; u32 cbuf_offset; u32 count; @@ -129,6 +225,7 @@ struct ImageDescriptor { ImageFormat format; bool is_written; bool is_read; + bool is_integer; u32 cbuf_index; u32 cbuf_offset; u32 count; @@ -227,6 +324,8 @@ struct Info { bool requires_layer_emulation{}; IR::Attribute emulated_layer{}; + u32 used_clip_distances{}; + boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> constant_buffer_descriptors; boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 592c28ba3..95ba4f76c 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -586,14 +586,22 @@ void Maxwell3D::ProcessQueryCondition() { } void Maxwell3D::ProcessCounterReset() { - switch (regs.clear_report_value) { - case Regs::ClearReport::ZPassPixelCount: - rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); - break; - default: - LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); - break; - } + const auto query_type = [clear_report = regs.clear_report_value]() { + switch (clear_report) { + case Tegra::Engines::Maxwell3D::Regs::ClearReport::ZPassPixelCount: + return VideoCommon::QueryType::ZPassPixelCount64; + case Tegra::Engines::Maxwell3D::Regs::ClearReport::StreamingPrimitivesSucceeded: + return VideoCommon::QueryType::StreamingPrimitivesSucceeded; + case Tegra::Engines::Maxwell3D::Regs::ClearReport::PrimitivesGenerated: + return VideoCommon::QueryType::PrimitivesGenerated; + case Tegra::Engines::Maxwell3D::Regs::ClearReport::VtgPrimitivesOut: + return VideoCommon::QueryType::VtgPrimitivesOut; + default: + LOG_DEBUG(HW_GPU, "Unimplemented counter reset={}", clear_report); + return VideoCommon::QueryType::Payload; + } + }(); + rasterizer->ResetCounter(query_type); } void Maxwell3D::ProcessSyncPoint() { diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 9fcaeeac7..a64404ce4 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -28,8 +28,11 @@ namespace VideoCore { enum class QueryType { SamplesPassed, + PrimitivesGenerated, + TfbPrimitivesWritten, + Count, }; -constexpr std::size_t NumQueryTypes = 1; +constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count); } // namespace VideoCore namespace VideoCommon { @@ -44,15 +47,6 @@ public: explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_) : cache{cache_}, type{type_} {} - /// Updates the state of the stream, enabling or disabling as needed. - void Update(bool enabled) { - if (enabled) { - Enable(); - } else { - Disable(); - } - } - /// Resets the stream to zero. It doesn't disable the query after resetting. void Reset() { if (current) { @@ -80,7 +74,6 @@ public: return current != nullptr; } -private: /// Enables the stream. void Enable() { if (current) { @@ -97,6 +90,7 @@ private: last = std::exchange(current, nullptr); } +private: QueryCache& cache; const VideoCore::QueryType type; @@ -112,8 +106,14 @@ public: : rasterizer{rasterizer_}, // Use reinterpret_cast instead of static_cast as workaround for // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) - cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast<QueryCache&>(*this), - VideoCore::QueryType::SamplesPassed}}} { + cpu_memory{cpu_memory_}, streams{{ + {CounterStream{reinterpret_cast<QueryCache&>(*this), + VideoCore::QueryType::SamplesPassed}}, + {CounterStream{reinterpret_cast<QueryCache&>(*this), + VideoCore::QueryType::PrimitivesGenerated}}, + {CounterStream{reinterpret_cast<QueryCache&>(*this), + VideoCore::QueryType::TfbPrimitivesWritten}}, + }} { (void)slot_async_jobs.insert(); // Null value } @@ -157,12 +157,11 @@ public: AsyncFlushQuery(query, timestamp, lock); } - /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. - void UpdateCounters() { + /// Enables all available GPU counters + void EnableCounters() { std::unique_lock lock{mutex}; - if (maxwell3d) { - const auto& regs = maxwell3d->regs; - Stream(VideoCore::QueryType::SamplesPassed).Update(regs.zpass_pixel_count_enable); + for (auto& stream : streams) { + stream.Enable(); } } @@ -176,7 +175,7 @@ public: void DisableStreams() { std::unique_lock lock{mutex}; for (auto& stream : streams) { - stream.Update(false); + stream.Disable(); } } @@ -353,7 +352,7 @@ private: std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; -}; +}; // namespace VideoCommon template <class QueryCache, class HostCounter> class HostCounterBase { diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index ec142d48e..fef7360ed 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -18,16 +18,27 @@ namespace OpenGL { namespace { -constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; - constexpr GLenum GetTarget(VideoCore::QueryType type) { - return QueryTargets[static_cast<std::size_t>(type)]; + switch (type) { + case VideoCore::QueryType::SamplesPassed: + return GL_SAMPLES_PASSED; + case VideoCore::QueryType::PrimitivesGenerated: + return GL_PRIMITIVES_GENERATED; + case VideoCore::QueryType::TfbPrimitivesWritten: + return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN; + default: + break; + } + UNIMPLEMENTED_MSG("Query type {}", type); + return 0; } } // Anonymous namespace QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) - : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} + : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { + EnableCounters(); +} QueryCache::~QueryCache() = default; @@ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) { auto& stream = cache->Stream(type); const bool slice_counter = WaitPending() && stream.IsEnabled(); if (slice_counter) { - stream.Update(false); + stream.Disable(); } auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush(); if (slice_counter) { - stream.Update(true); + stream.Enable(); } return result; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 291515e73..7a5fad735 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } + +std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) { + switch (type) { + case VideoCommon::QueryType::PrimitivesGenerated: + case VideoCommon::QueryType::VtgPrimitivesOut: + return VideoCore::QueryType::PrimitivesGenerated; + case VideoCommon::QueryType::ZPassPixelCount64: + return VideoCore::QueryType::SamplesPassed; + case VideoCommon::QueryType::StreamingPrimitivesSucceeded: + // case VideoCommon::QueryType::StreamingByteCount: + // TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride + return VideoCore::QueryType::TfbPrimitivesWritten; + default: + return std::nullopt; + } +} } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -216,7 +232,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { SCOPE_EXIT({ gpu.TickWork(); }); gpu_memory->FlushCaching(); - query_cache.UpdateCounters(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; if (!pipeline) { @@ -341,7 +356,6 @@ void RasterizerOpenGL::DrawTexture() { MICROPROFILE_SCOPE(OpenGL_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); - query_cache.UpdateCounters(); texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.UpdateRenderTargets(false); @@ -408,21 +422,28 @@ void RasterizerOpenGL::DispatchCompute() { } void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { - if (type == VideoCommon::QueryType::ZPassPixelCount64) { - query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed); + const auto query_cache_type = MaxwellToVideoCoreQuery(type); + if (!query_cache_type.has_value()) { + UNIMPLEMENTED_IF_MSG(type != VideoCommon::QueryType::Payload, "Reset query type: {}", type); + return; } + query_cache.ResetCounter(*query_cache_type); } void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { - if (type == VideoCommon::QueryType::ZPassPixelCount64) { - if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { - query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); - } else { - query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt); - } - return; + const auto query_cache_type = MaxwellToVideoCoreQuery(type); + if (!query_cache_type.has_value()) { + return QueryFallback(gpu_addr, type, flags, payload, subreport); } + const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout); + const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt; + query_cache.Query(gpu_addr, *query_cache_type, timestamp); +} + +void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, + u32 subreport) { if (type != VideoCommon::QueryType::Payload) { payload = 1u; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d28388a9d..ce3460938 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -231,6 +231,9 @@ private: /// End a transform feedback void EndTransformFeedback(); + void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); + Tegra::GPU& gpu; const Device& device; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 26f2d0ea7..30df41b7d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -51,7 +51,7 @@ using VideoCommon::LoadPipelines; using VideoCommon::SerializePipeline; using Context = ShaderContext::Context; -constexpr u32 CACHE_VERSION = 9; +constexpr u32 CACHE_VERSION = 10; template <typename Container> auto MakeSpan(Container& container) { @@ -233,6 +233,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .ignore_nan_fp_comparisons = true, .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(), + .max_user_clip_distances = 8, }, host_info{ .support_float64 = true, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5958f52f7..3c61799fa 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -78,8 +78,15 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo } } // Anonymous namespace -Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) - : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {} +Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) + : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} { + if (runtime.device.HasNullDescriptor()) { + return; + } + device = &runtime.device; + buffer = runtime.CreateNullBuffer(); + is_null = true; +} Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) @@ -93,8 +100,12 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { if (!device) { - // Null buffer, return a null descriptor + // Null buffer supported, return a null descriptor return VK_NULL_HANDLE; + } else if (is_null) { + // Null buffer not supported, adjust offset and size + offset = 0; + size = 0; } const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { return offset == view.offset && size == view.size && format == view.format; @@ -563,22 +574,27 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi } buffer_handles.push_back(handle); } + const u32 device_max = device.GetMaxVertexInputBindings(); + const u32 min_binding = std::min(bindings.min_index, device_max); + const u32 max_binding = std::min(bindings.max_index, device_max); + const u32 binding_count = max_binding - min_binding; + if (binding_count == 0) { + return; + } if (device.IsExtExtendedDynamicStateSupported()) { - scheduler.Record([this, bindings_ = std::move(bindings), - buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, - std::min(bindings_.max_index - bindings_.min_index, - device.GetMaxVertexInputBindings()), - buffer_handles_.data(), bindings_.offsets.data(), - bindings_.sizes.data(), bindings_.strides.data()); + scheduler.Record([bindings_ = std::move(bindings), + buffer_handles_ = std::move(buffer_handles), + binding_count](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, binding_count, buffer_handles_.data(), + bindings_.offsets.data(), bindings_.sizes.data(), + bindings_.strides.data()); }); } else { - scheduler.Record([this, bindings_ = std::move(bindings), - buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers(bindings_.min_index, - std::min(bindings_.max_index - bindings_.min_index, - device.GetMaxVertexInputBindings()), - buffer_handles_.data(), bindings_.offsets.data()); + scheduler.Record([bindings_ = std::move(bindings), + buffer_handles_ = std::move(buffer_handles), + binding_count](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers(bindings_.min_index, binding_count, buffer_handles_.data(), + bindings_.offsets.data()); }); } } @@ -622,9 +638,12 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings< } void BufferCacheRuntime::ReserveNullBuffer() { - if (null_buffer) { - return; + if (!null_buffer) { + null_buffer = CreateNullBuffer(); } +} + +vk::Buffer BufferCacheRuntime::CreateNullBuffer() { VkBufferCreateInfo create_info{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -639,15 +658,17 @@ void BufferCacheRuntime::ReserveNullBuffer() { if (device.IsExtTransformFeedbackSupported()) { create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } - null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal); + vk::Buffer ret = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal); if (device.HasDebuggingToolAttached()) { - null_buffer.SetObjectNameEXT("Null buffer"); + ret.SetObjectNameEXT("Null buffer"); } scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer = *ret](vk::CommandBuffer cmdbuf) { cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0); }); + + return ret; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 0b3fbd6d0..dc300d7cb 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -63,6 +63,7 @@ private: vk::Buffer buffer; std::vector<BufferView> views; VideoCommon::UsageTracker tracker; + bool is_null{}; }; class QuadArrayIndexBuffer; @@ -151,6 +152,7 @@ private: } void ReserveNullBuffer(); + vk::Buffer CreateNullBuffer(); const Device& device; MemoryAllocator& memory_allocator; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2a13b2a72..d1841198d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 10; +constexpr u32 CACHE_VERSION = 11; constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'}; template <typename Container> @@ -374,6 +374,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .has_broken_robust = device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, .min_ssbo_alignment = device.GetStorageBufferAlignment(), + .max_user_clip_distances = device.GetMaxUserClipDistances(), }; host_info = Shader::HostTranslateInfo{ diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 078777cdd..95954ade7 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -289,12 +289,15 @@ public: } if (has_multi_queries) { - size_t intermediary_buffer_index = ObtainBuffer<false>(num_slots_used); + const size_t min_accumulation_limit = + std::min(first_accumulation_checkpoint, num_slots_used); + const size_t max_accumulation_limit = + std::max(last_accumulation_checkpoint, num_slots_used); + const size_t intermediary_buffer_index = ObtainBuffer<false>(num_slots_used); resolve_buffers.push_back(intermediary_buffer_index); queries_prefix_scan_pass->Run(*accumulation_buffer, *buffers[intermediary_buffer_index], *buffers[resolve_buffer_index], num_slots_used, - std::min(first_accumulation_checkpoint, num_slots_used), - last_accumulation_checkpoint); + min_accumulation_limit, max_accumulation_limit); } else { scheduler.RequestOutsideRenderPassOperationContext(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 59829c88b..241fc34be 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -485,6 +485,10 @@ void RasterizerVulkan::DispatchCompute() { } void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { + if (type != VideoCommon::QueryType::ZPassPixelCount64) { + LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type); + return; + } query_cache.CounterReset(type); } diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 4edbe5700..492440ac4 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -62,23 +62,9 @@ static Shader::TextureType ConvertTextureType(const Tegra::Texture::TICEntry& en } static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture::TICEntry& entry) { - switch (PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type, entry.b_type, - entry.a_type, entry.srgb_conversion)) { - case VideoCore::Surface::PixelFormat::A8B8G8R8_SNORM: - return Shader::TexturePixelFormat::A8B8G8R8_SNORM; - case VideoCore::Surface::PixelFormat::R8_SNORM: - return Shader::TexturePixelFormat::R8_SNORM; - case VideoCore::Surface::PixelFormat::R8G8_SNORM: - return Shader::TexturePixelFormat::R8G8_SNORM; - case VideoCore::Surface::PixelFormat::R16G16B16A16_SNORM: - return Shader::TexturePixelFormat::R16G16B16A16_SNORM; - case VideoCore::Surface::PixelFormat::R16G16_SNORM: - return Shader::TexturePixelFormat::R16G16_SNORM; - case VideoCore::Surface::PixelFormat::R16_SNORM: - return Shader::TexturePixelFormat::R16_SNORM; - default: - return Shader::TexturePixelFormat::OTHER; - } + return static_cast<Shader::TexturePixelFormat>( + PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type, entry.b_type, + entry.a_type, entry.srgb_conversion)); } static std::string_view StageToPrefix(Shader::Stage stage) { @@ -398,6 +384,11 @@ Shader::TexturePixelFormat GraphicsEnvironment::ReadTexturePixelFormat(u32 handl return result; } +bool GraphicsEnvironment::IsTexturePixelFormatInteger(u32 handle) { + return VideoCore::Surface::IsPixelFormatInteger( + static_cast<VideoCore::Surface::PixelFormat>(ReadTexturePixelFormat(handle))); +} + u32 GraphicsEnvironment::ReadViewportTransformState() { const auto& regs{maxwell3d->regs}; viewport_transform_state = regs.viewport_scale_offset_enabled; @@ -448,6 +439,11 @@ Shader::TexturePixelFormat ComputeEnvironment::ReadTexturePixelFormat(u32 handle return result; } +bool ComputeEnvironment::IsTexturePixelFormatInteger(u32 handle) { + return VideoCore::Surface::IsPixelFormatInteger( + static_cast<VideoCore::Surface::PixelFormat>(ReadTexturePixelFormat(handle))); +} + u32 ComputeEnvironment::ReadViewportTransformState() { return viewport_transform_state; } @@ -551,6 +547,11 @@ Shader::TexturePixelFormat FileEnvironment::ReadTexturePixelFormat(u32 handle) { return it->second; } +bool FileEnvironment::IsTexturePixelFormatInteger(u32 handle) { + return VideoCore::Surface::IsPixelFormatInteger( + static_cast<VideoCore::Surface::PixelFormat>(ReadTexturePixelFormat(handle))); +} + u32 FileEnvironment::ReadViewportTransformState() { return viewport_transform_state; } diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index b90f3d44e..6b372e336 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -115,6 +115,8 @@ public: Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; + bool IsTexturePixelFormatInteger(u32 handle) override; + u32 ReadViewportTransformState() override; std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, u32 offset) override; @@ -139,6 +141,8 @@ public: Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; + bool IsTexturePixelFormatInteger(u32 handle) override; + u32 ReadViewportTransformState() override; std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer( @@ -171,6 +175,8 @@ public: [[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override; + [[nodiscard]] bool IsTexturePixelFormatInteger(u32 handle) override; + [[nodiscard]] u32 ReadViewportTransformState() override; [[nodiscard]] u32 LocalMemorySize() const override; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 1fda0042d..a6fbca69e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -695,6 +695,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR std::min(properties.properties.limits.maxVertexInputBindings, 16U); } + if (is_turnip) { + LOG_WARNING(Render_Vulkan, "Turnip requires higher-than-reported binding limits"); + properties.properties.limits.maxVertexInputBindings = 32; + } + if (!extensions.extended_dynamic_state && extensions.extended_dynamic_state2) { LOG_INFO(Render_Vulkan, "Removing extendedDynamicState2 due to missing extendedDynamicState"); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4f3846345..701817086 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -665,6 +665,10 @@ public: return properties.properties.limits.maxViewports; } + u32 GetMaxUserClipDistances() const { + return properties.properties.limits.maxClipDistances; + } + bool SupportsConditionalBarriers() const { return supports_conditional_barriers; } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 70cf14afa..2f78b8af0 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -377,6 +377,8 @@ const char* ToString(VkResult result) noexcept { return "VK_OPERATION_DEFERRED_KHR"; case VkResult::VK_OPERATION_NOT_DEFERRED_KHR: return "VK_OPERATION_NOT_DEFERRED_KHR"; + case VkResult::VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR: + return "VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR"; case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT: return "VK_PIPELINE_COMPILE_REQUIRED_EXT"; case VkResult::VK_RESULT_MAX_ENUM: diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index fd6bebf0f..0836bcb7e 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -106,32 +106,30 @@ ConfigureGraphics::ConfigureGraphics( Settings::values.bg_green.GetValue(), Settings::values.bg_blue.GetValue())); UpdateAPILayout(); - PopulateVSyncModeSelection(); //< must happen after UpdateAPILayout + PopulateVSyncModeSelection(false); //< must happen after UpdateAPILayout // VSync setting needs to be determined after populating the VSync combobox - if (Settings::IsConfiguringGlobal()) { - const auto vsync_mode_setting = Settings::values.vsync_mode.GetValue(); - const auto vsync_mode = VSyncSettingToMode(vsync_mode_setting); - int index{}; - for (const auto mode : vsync_mode_combobox_enum_map) { - if (mode == vsync_mode) { - break; - } - index++; - } - if (static_cast<unsigned long>(index) < vsync_mode_combobox_enum_map.size()) { - vsync_mode_combobox->setCurrentIndex(index); + const auto vsync_mode_setting = Settings::values.vsync_mode.GetValue(); + const auto vsync_mode = VSyncSettingToMode(vsync_mode_setting); + int index{}; + for (const auto mode : vsync_mode_combobox_enum_map) { + if (mode == vsync_mode) { + break; } + index++; + } + if (static_cast<unsigned long>(index) < vsync_mode_combobox_enum_map.size()) { + vsync_mode_combobox->setCurrentIndex(index); } connect(api_combobox, qOverload<int>(&QComboBox::activated), this, [this] { UpdateAPILayout(); - PopulateVSyncModeSelection(); + PopulateVSyncModeSelection(false); }); connect(vulkan_device_combobox, qOverload<int>(&QComboBox::activated), this, [this](int device) { UpdateDeviceSelection(device); - PopulateVSyncModeSelection(); + PopulateVSyncModeSelection(false); }); connect(shader_backend_combobox, qOverload<int>(&QComboBox::activated), this, [this](int backend) { UpdateShaderBackendSelection(backend); }); @@ -147,8 +145,9 @@ ConfigureGraphics::ConfigureGraphics( const auto& update_screenshot_info = [this, &builder]() { const auto& combobox_enumerations = builder.ComboboxTranslations().at( Settings::EnumMetadata<Settings::AspectRatio>::Index()); - const auto index = aspect_ratio_combobox->currentIndex(); - const auto ratio = static_cast<Settings::AspectRatio>(combobox_enumerations[index].first); + const auto ratio_index = aspect_ratio_combobox->currentIndex(); + const auto ratio = + static_cast<Settings::AspectRatio>(combobox_enumerations[ratio_index].first); const auto& combobox_enumerations_resolution = builder.ComboboxTranslations().at( Settings::EnumMetadata<Settings::ResolutionSetup>::Index()); @@ -174,11 +173,7 @@ ConfigureGraphics::ConfigureGraphics( } } -void ConfigureGraphics::PopulateVSyncModeSelection() { - if (!Settings::IsConfiguringGlobal()) { - return; - } - +void ConfigureGraphics::PopulateVSyncModeSelection(bool use_setting) { const Settings::RendererBackend backend{GetCurrentGraphicsBackend()}; if (backend == Settings::RendererBackend::Null) { vsync_mode_combobox->setEnabled(false); @@ -189,8 +184,9 @@ void ConfigureGraphics::PopulateVSyncModeSelection() { const int current_index = //< current selected vsync mode from combobox vsync_mode_combobox->currentIndex(); const auto current_mode = //< current selected vsync mode as a VkPresentModeKHR - current_index == -1 ? VSyncSettingToMode(Settings::values.vsync_mode.GetValue()) - : vsync_mode_combobox_enum_map[current_index]; + current_index == -1 || use_setting + ? VSyncSettingToMode(Settings::values.vsync_mode.GetValue()) + : vsync_mode_combobox_enum_map[current_index]; int index{}; const int device{vulkan_device_combobox->currentIndex()}; //< current selected Vulkan device @@ -214,6 +210,23 @@ void ConfigureGraphics::PopulateVSyncModeSelection() { } index++; } + + if (!Settings::IsConfiguringGlobal()) { + vsync_restore_global_button->setVisible(!Settings::values.vsync_mode.UsingGlobal()); + + const Settings::VSyncMode global_vsync_mode = Settings::values.vsync_mode.GetValue(true); + vsync_restore_global_button->setEnabled( + (backend == Settings::RendererBackend::OpenGL && + (global_vsync_mode == Settings::VSyncMode::Immediate || + global_vsync_mode == Settings::VSyncMode::Fifo)) || + backend == Settings::RendererBackend::Vulkan); + } +} + +void ConfigureGraphics::UpdateVsyncSetting() const { + const auto mode = vsync_mode_combobox_enum_map[vsync_mode_combobox->currentIndex()]; + const auto vsync_mode = PresentModeToSetting(mode); + Settings::values.vsync_mode.SetValue(vsync_mode); } void ConfigureGraphics::UpdateDeviceSelection(int device) { @@ -299,6 +312,33 @@ void ConfigureGraphics::Setup(const ConfigurationShared::Builder& builder) { } else if (setting->Id() == Settings::values.vsync_mode.Id()) { // Keep track of vsync_mode's combobox so we can populate it vsync_mode_combobox = widget->combobox; + + // Since vsync is populated at runtime, we have to manually set up the button for + // restoring the global setting. + if (!Settings::IsConfiguringGlobal()) { + QPushButton* restore_button = + ConfigurationShared::Widget::CreateRestoreGlobalButton( + Settings::values.vsync_mode.UsingGlobal(), widget); + restore_button->setEnabled(true); + widget->layout()->addWidget(restore_button); + + QObject::connect(restore_button, &QAbstractButton::clicked, + [restore_button, this](bool) { + Settings::values.vsync_mode.SetGlobal(true); + PopulateVSyncModeSelection(true); + + restore_button->setVisible(false); + }); + + std::function<void()> set_non_global = [restore_button, this]() { + Settings::values.vsync_mode.SetGlobal(false); + UpdateVsyncSetting(); + restore_button->setVisible(true); + }; + QObject::connect(widget->combobox, QOverload<int>::of(&QComboBox::activated), + [set_non_global]() { set_non_global(); }); + vsync_restore_global_button = restore_button; + } hold_graphics.emplace(setting->Id(), widget); } else if (setting->Id() == Settings::values.aspect_ratio.Id()) { // Keep track of the aspect ratio combobox to update other UI tabs that need it @@ -400,11 +440,7 @@ void ConfigureGraphics::ApplyConfiguration() { func(powered_on); } - if (Settings::IsConfiguringGlobal()) { - const auto mode = vsync_mode_combobox_enum_map[vsync_mode_combobox->currentIndex()]; - const auto vsync_mode = PresentModeToSetting(mode); - Settings::values.vsync_mode.SetValue(vsync_mode); - } + UpdateVsyncSetting(); Settings::values.vulkan_device.SetGlobal(true); Settings::values.shader_backend.SetGlobal(true); diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index 9c24a56db..5c8286836 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -62,7 +62,8 @@ private: void Setup(const ConfigurationShared::Builder& builder); - void PopulateVSyncModeSelection(); + void PopulateVSyncModeSelection(bool use_setting); + void UpdateVsyncSetting() const; void UpdateBackgroundColorButton(QColor color); void UpdateAPILayout(); void UpdateDeviceSelection(int device); @@ -104,6 +105,7 @@ private: QComboBox* api_combobox; QComboBox* shader_backend_combobox; QComboBox* vsync_mode_combobox; + QPushButton* vsync_restore_global_button; QWidget* vulkan_device_widget; QWidget* api_widget; QWidget* shader_backend_widget; |