72 files changed, 1863 insertions, 640 deletions
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/GamesFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/GamesFragment.kt
index fc0eeb9ad..54380323e 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/GamesFragment.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/GamesFragment.kt
@@ -91,18 +91,20 @@ class GamesFragment : Fragment() {
         viewLifecycleOwner.lifecycleScope.apply {
             launch {
                 repeatOnLifecycle(Lifecycle.State.RESUMED) {
-                    gamesViewModel.isReloading.collect { binding.swipeRefresh.isRefreshing = it }
+                    gamesViewModel.isReloading.collect {
+                        binding.swipeRefresh.isRefreshing = it
+                        if (gamesViewModel.games.value.isEmpty() && !it) {
+                            binding.noticeText.visibility = View.VISIBLE
+                        } else {
+                            binding.noticeText.visibility = View.INVISIBLE
+                        }
+                    }
                 }
             }
             launch {
                 repeatOnLifecycle(Lifecycle.State.RESUMED) {
                     gamesViewModel.games.collectLatest {
                         (binding.gridGames.adapter as GameAdapter).submitList(it)
-                        if (it.isEmpty()) {
-                            binding.noticeText.visibility = View.VISIBLE
-                        } else {
-                            binding.noticeText.visibility = View.GONE
-                        }
                     }
                 }
             }
diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp
index 698c9c8ad..5dc7e5d59 100644
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -9,7 +9,7 @@
 
 namespace Core {
 
-void ArmInterface::LogBacktrace(const Kernel::KProcess* process) const {
+void ArmInterface::LogBacktrace(Kernel::KProcess* process) const {
     Kernel::Svc::ThreadContext ctx;
     this->GetContext(ctx);
 
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 806c7c9e9..495963eef 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -95,7 +95,7 @@ public:
     virtual void SignalInterrupt(Kernel::KThread* thread) = 0;
 
     // Stack trace generation.
-    void LogBacktrace(const Kernel::KProcess* process) const;
+    void LogBacktrace(Kernel::KProcess* process) const;
 
     // Debug functionality.
     virtual const Kernel::DebugWatchpoint* HaltedWatchpoint() const = 0;
diff --git a/src/core/arm/debug.cpp b/src/core/arm/debug.cpp
index af1c34bc3..854509463 100644
--- a/src/core/arm/debug.cpp
+++ b/src/core/arm/debug.cpp
@@ -79,7 +79,7 @@ constexpr std::array<u64, 2> SegmentBases{
     0x7100000000ULL,
 };
 
-void SymbolicateBacktrace(const Kernel::KProcess* process, std::vector<BacktraceEntry>& out) {
+void SymbolicateBacktrace(Kernel::KProcess* process, std::vector<BacktraceEntry>& out) {
     auto modules = FindModules(process);
 
     const bool is_64 = process->Is64Bit();
@@ -118,7 +118,7 @@ void SymbolicateBacktrace(const Kernel::KProcess* process, std::vector<Backtrace
     }
 }
 
-std::vector<BacktraceEntry> GetAArch64Backtrace(const Kernel::KProcess* process,
+std::vector<BacktraceEntry> GetAArch64Backtrace(Kernel::KProcess* process,
                                                 const Kernel::Svc::ThreadContext& ctx) {
     std::vector<BacktraceEntry> out;
     auto& memory = process->GetMemory();
@@ -144,7 +144,7 @@ std::vector<BacktraceEntry> GetAArch64Backtrace(const Kernel::KProcess* process,
     return out;
 }
 
-std::vector<BacktraceEntry> GetAArch32Backtrace(const Kernel::KProcess* process,
+std::vector<BacktraceEntry> GetAArch32Backtrace(Kernel::KProcess* process,
                                                 const Kernel::Svc::ThreadContext& ctx) {
     std::vector<BacktraceEntry> out;
     auto& memory = process->GetMemory();
@@ -173,7 +173,7 @@ std::vector<BacktraceEntry> GetAArch32Backtrace(const Kernel::KProcess* process,
 } // namespace
 
 std::optional<std::string> GetThreadName(const Kernel::KThread* thread) {
-    const auto* process = thread->GetOwnerProcess();
+    auto* process = thread->GetOwnerProcess();
     if (process->Is64Bit()) {
         return GetNameFromThreadType64(process->GetMemory(), *thread);
     } else {
@@ -248,7 +248,7 @@ Kernel::KProcessAddress GetModuleEnd(const Kernel::KProcess* process,
     return cur_addr - 1;
 }
 
-Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process) {
+Loader::AppLoader::Modules FindModules(Kernel::KProcess* process) {
     Loader::AppLoader::Modules modules;
 
     auto& page_table = process->GetPageTable();
@@ -312,7 +312,7 @@ Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process) {
     return modules;
 }
 
-Kernel::KProcessAddress FindMainModuleEntrypoint(const Kernel::KProcess* process) {
+Kernel::KProcessAddress FindMainModuleEntrypoint(Kernel::KProcess* process) {
     // Do we have any loaded executable sections?
     auto modules = FindModules(process);
 
@@ -337,7 +337,7 @@ void InvalidateInstructionCacheRange(const Kernel::KProcess* process, u64 addres
     }
 }
 
-std::vector<BacktraceEntry> GetBacktraceFromContext(const Kernel::KProcess* process,
+std::vector<BacktraceEntry> GetBacktraceFromContext(Kernel::KProcess* process,
                                                     const Kernel::Svc::ThreadContext& ctx) {
     if (process->Is64Bit()) {
         return GetAArch64Backtrace(process, ctx);
diff --git a/src/core/arm/debug.h b/src/core/arm/debug.h
index c542633db..3cd671365 100644
--- a/src/core/arm/debug.h
+++ b/src/core/arm/debug.h
@@ -14,9 +14,9 @@ std::optional<std::string> GetThreadName(const Kernel::KThread* thread);
 std::string_view GetThreadWaitReason(const Kernel::KThread* thread);
 std::string GetThreadState(const Kernel::KThread* thread);
 
-Loader::AppLoader::Modules FindModules(const Kernel::KProcess* process);
+Loader::AppLoader::Modules FindModules(Kernel::KProcess* process);
 Kernel::KProcessAddress GetModuleEnd(const Kernel::KProcess* process, Kernel::KProcessAddress base);
-Kernel::KProcessAddress FindMainModuleEntrypoint(const Kernel::KProcess* process);
+Kernel::KProcessAddress FindMainModuleEntrypoint(Kernel::KProcess* process);
 
 void InvalidateInstructionCacheRange(const Kernel::KProcess* process, u64 address, u64 size);
 
@@ -28,7 +28,7 @@ struct BacktraceEntry {
     std::string name;
 };
 
-std::vector<BacktraceEntry> GetBacktraceFromContext(const Kernel::KProcess* process,
+std::vector<BacktraceEntry> GetBacktraceFromContext(Kernel::KProcess* process,
                                                     const Kernel::Svc::ThreadContext& ctx);
 std::vector<BacktraceEntry> GetBacktrace(const Kernel::KThread* thread);
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index f34865e26..c78cfd528 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -15,7 +15,7 @@ using namespace Common::Literals;
 
 class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
 public:
-    explicit DynarmicCallbacks32(ArmDynarmic32& parent, const Kernel::KProcess* process)
+    explicit DynarmicCallbacks32(ArmDynarmic32& parent, Kernel::KProcess* process)
         : m_parent{parent}, m_memory(process->GetMemory()),
           m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()},
           m_check_memory_access{m_debugger_enabled ||
@@ -169,7 +169,7 @@ public:
 
     ArmDynarmic32& m_parent;
     Core::Memory::Memory& m_memory;
-    const Kernel::KProcess* m_process{};
+    Kernel::KProcess* m_process{};
     const bool m_debugger_enabled{};
     const bool m_check_memory_access{};
     static constexpr u64 MinimumRunCycles = 10000U;
@@ -370,7 +370,7 @@ void ArmDynarmic32::RewindBreakpointInstruction() {
     this->SetContext(m_breakpoint_context);
 }
 
-ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process,
                              DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index)
     : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor},
       m_cb(std::make_unique<DynarmicCallbacks32>(*this, process)),
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 185ac7cbf..b580efe61 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -20,7 +20,7 @@ class System;
 
 class ArmDynarmic32 final : public ArmInterface {
 public:
-    ArmDynarmic32(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+    ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process,
                   DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ArmDynarmic32() override;
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index dff14756e..f351b13d9 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -15,7 +15,7 @@ using namespace Common::Literals;
 
 class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
 public:
-    explicit DynarmicCallbacks64(ArmDynarmic64& parent, const Kernel::KProcess* process)
+    explicit DynarmicCallbacks64(ArmDynarmic64& parent, Kernel::KProcess* process)
         : m_parent{parent}, m_memory(process->GetMemory()),
           m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()},
           m_check_memory_access{m_debugger_enabled ||
@@ -216,7 +216,7 @@ public:
     Core::Memory::Memory& m_memory;
     u64 m_tpidrro_el0{};
     u64 m_tpidr_el0{};
-    const Kernel::KProcess* m_process{};
+    Kernel::KProcess* m_process{};
     const bool m_debugger_enabled{};
     const bool m_check_memory_access{};
     static constexpr u64 MinimumRunCycles = 10000U;
@@ -399,7 +399,7 @@ void ArmDynarmic64::RewindBreakpointInstruction() {
     this->SetContext(m_breakpoint_context);
 }
 
-ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process,
                              DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index)
     : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor},
       m_cb(std::make_unique<DynarmicCallbacks64>(*this, process)), m_core_index{core_index} {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 4f3dd026f..08cd982b3 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -25,7 +25,7 @@ class System;
 
 class ArmDynarmic64 final : public ArmInterface {
 public:
-    ArmDynarmic64(System& system, bool uses_wall_clock, const Kernel::KProcess* process,
+    ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process,
                   DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ArmDynarmic64() override;
 
diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp
index 1311e66a9..123b3da7e 100644
--- a/src/core/arm/nce/arm_nce.cpp
+++ b/src/core/arm/nce/arm_nce.cpp
@@ -39,7 +39,7 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
 }
 
 using namespace Common::Literals;
-constexpr u32 StackSize = 32_KiB;
+constexpr u32 StackSize = 128_KiB;
 
 } // namespace
 
diff --git a/src/core/arm/nce/interpreter_visitor.cpp b/src/core/arm/nce/interpreter_visitor.cpp
index 8e81c66a5..def888d15 100644
--- a/src/core/arm/nce/interpreter_visitor.cpp
+++ b/src/core/arm/nce/interpreter_visitor.cpp
@@ -5,8 +5,6 @@
 #include "common/bit_cast.h"
 #include "core/arm/nce/interpreter_visitor.h"
 
-#include <dynarmic/frontend/A64/decoder/a64.h>
-
 namespace Core {
 
 template <u32 BitSize>
@@ -249,6 +247,7 @@ bool InterpreterVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) {
         return false;
     }
 
+    // Size in bytes
     const u64 size = 4 << opc.ZeroExtend();
     const u64 offset = imm19.SignExtend<u64>() << 2;
     const u64 address = this->GetPc() + offset;
@@ -530,7 +529,7 @@ bool InterpreterVisitor::SIMDImmediate(bool wback, bool postindex, size_t scale,
     }
     case MemOp::Load: {
         u128 data{};
-        m_memory.ReadBlock(address, &data, datasize);
+        m_memory.ReadBlock(address, &data, datasize / 8);
         this->SetVec(Vt, data);
         break;
     }
diff --git a/src/core/arm/nce/visitor_base.h b/src/core/arm/nce/visitor_base.h
index 8fb032912..6a2be3d9b 100644
--- a/src/core/arm/nce/visitor_base.h
+++ b/src/core/arm/nce/visitor_base.h
@@ -4,9 +4,15 @@
 
 #pragma once
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+
 #include <dynarmic/frontend/A64/a64_types.h>
+#include <dynarmic/frontend/A64/decoder/a64.h>
 #include <dynarmic/frontend/imm.h>
 
+#pragma GCC diagnostic pop
+
 namespace Core {
 
 class VisitorBase {
diff --git a/src/core/core.cpp b/src/core/core.cpp
index b14f74976..66f444d39 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -28,7 +28,6 @@
 #include "core/file_sys/savedata_factory.h"
 #include "core/file_sys/vfs_concat.h"
 #include "core/file_sys/vfs_real.h"
-#include "core/gpu_dirty_memory_manager.h"
 #include "core/hid/hid_core.h"
 #include "core/hle/kernel/k_memory_manager.h"
 #include "core/hle/kernel/k_process.h"
@@ -130,11 +129,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
 
 struct System::Impl {
     explicit Impl(System& system)
-        : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{},
-          cpu_manager{system}, reporter{system}, applet_manager{system}, profile_manager{},
-          time_manager{system}, gpu_dirty_memory_write_manager{} {
-        memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
-    }
+        : kernel{system}, fs_controller{system}, hid_core{}, room_network{}, cpu_manager{system},
+          reporter{system}, applet_manager{system}, profile_manager{}, time_manager{system} {}
 
     void Initialize(System& system) {
         device_memory = std::make_unique<Core::DeviceMemory>();
@@ -241,17 +237,17 @@ struct System::Impl {
         debugger = std::make_unique<Debugger>(system, port);
     }
 
-    SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) {
+    void InitializeKernel(System& system) {
         LOG_DEBUG(Core, "initialized OK");
 
         // Setting changes may require a full system reinitialization (e.g., disabling multicore).
         ReinitializeIfNecessary(system);
 
-        memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
-
         kernel.Initialize();
         cpu_manager.Initialize();
+    }
 
+    SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) {
         /// Reset all glue registrations
         arp_manager.ResetAll();
 
@@ -300,17 +296,9 @@ struct System::Impl {
             return SystemResultStatus::ErrorGetLoader;
         }
 
-        SystemResultStatus init_result{SetupForApplicationProcess(system, emu_window)};
-        if (init_result != SystemResultStatus::Success) {
-            LOG_CRITICAL(Core, "Failed to initialize system (Error {})!",
-                         static_cast<int>(init_result));
-            ShutdownMainProcess();
-            return init_result;
-        }
-
-        telemetry_session->AddInitialInfo(*app_loader, fs_controller, *content_provider);
+        InitializeKernel(system);
 
-        // Create the process.
+        // Create the application process.
         auto main_process = Kernel::KProcess::Create(system.Kernel());
         Kernel::KProcess::Register(system.Kernel(), main_process);
         kernel.AppendNewProcess(main_process);
@@ -323,7 +311,18 @@ struct System::Impl {
             return static_cast<SystemResultStatus>(
                 static_cast<u32>(SystemResultStatus::ErrorLoader) + static_cast<u32>(load_result));
         }
+
+        // Set up the rest of the system.
+        SystemResultStatus init_result{SetupForApplicationProcess(system, emu_window)};
+        if (init_result != SystemResultStatus::Success) {
+            LOG_CRITICAL(Core, "Failed to initialize system (Error {})!",
+                         static_cast<int>(init_result));
+            ShutdownMainProcess();
+            return init_result;
+        }
+
         AddGlueRegistrationForProcess(*app_loader, *main_process);
+        telemetry_session->AddInitialInfo(*app_loader, fs_controller, *content_provider);
 
         // Initialize cheat engine
         if (cheat_engine) {
@@ -426,7 +425,6 @@ struct System::Impl {
         cpu_manager.Shutdown();
         debugger.reset();
         kernel.Shutdown();
-        memory.Reset();
         Network::RestartSocketOperations();
 
         if (auto room_member = room_network.GetRoomMember().lock()) {
@@ -507,7 +505,6 @@ struct System::Impl {
     std::unique_ptr<Tegra::Host1x::Host1x> host1x_core;
     std::unique_ptr<Core::DeviceMemory> device_memory;
     std::unique_ptr<AudioCore::AudioCore> audio_core;
-    Core::Memory::Memory memory;
     Core::HID::HIDCore hid_core;
     Network::RoomNetwork room_network;
 
@@ -567,9 +564,6 @@ struct System::Impl {
     std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
     std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
 
-    std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
-        gpu_dirty_memory_write_manager{};
-
     std::deque<std::vector<u8>> user_channel;
 };
 
@@ -652,29 +646,12 @@ void System::PrepareReschedule(const u32 core_index) {
     impl->kernel.PrepareReschedule(core_index);
 }
 
-Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() {
-    const std::size_t core = impl->kernel.GetCurrentHostThreadID();
-    return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
-                                                    ? core
-                                                    : Core::Hardware::NUM_CPU_CORES - 1];
-}
-
-/// Provides a constant reference to the current gou dirty memory manager.
-const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const {
-    const std::size_t core = impl->kernel.GetCurrentHostThreadID();
-    return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
-                                                    ? core
-                                                    : Core::Hardware::NUM_CPU_CORES - 1];
-}
-
 size_t System::GetCurrentHostThreadID() const {
     return impl->kernel.GetCurrentHostThreadID();
 }
 
 void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
-    for (auto& manager : impl->gpu_dirty_memory_write_manager) {
-        manager.Gather(callback);
-    }
+    return this->ApplicationProcess()->GatherGPUDirtyMemory(callback);
 }
 
 PerfStatsResults System::GetAndResetPerfStats() {
@@ -723,20 +700,12 @@ const Kernel::KProcess* System::ApplicationProcess() const {
     return impl->kernel.ApplicationProcess();
 }
 
-ExclusiveMonitor& System::Monitor() {
-    return impl->kernel.GetExclusiveMonitor();
-}
-
-const ExclusiveMonitor& System::Monitor() const {
-    return impl->kernel.GetExclusiveMonitor();
-}
-
 Memory::Memory& System::ApplicationMemory() {
-    return impl->memory;
+    return impl->kernel.ApplicationProcess()->GetMemory();
 }
 
 const Core::Memory::Memory& System::ApplicationMemory() const {
-    return impl->memory;
+    return impl->kernel.ApplicationProcess()->GetMemory();
 }
 
 Tegra::GPU& System::GPU() {
diff --git a/src/core/core.h b/src/core/core.h
index 473204db7..ba5add0dc 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -116,7 +116,6 @@ class CpuManager;
 class Debugger;
 class DeviceMemory;
 class ExclusiveMonitor;
-class GPUDirtyMemoryManager;
 class PerfStats;
 class Reporter;
 class SpeedLimiter;
@@ -225,12 +224,6 @@ public:
     /// Prepare the core emulation for a reschedule
     void PrepareReschedule(u32 core_index);
 
-    /// Provides a reference to the gou dirty memory manager.
-    [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager();
-
-    /// Provides a constant reference to the current gou dirty memory manager.
-    [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const;
-
     void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
 
     [[nodiscard]] size_t GetCurrentHostThreadID() const;
@@ -250,12 +243,6 @@ public:
     /// Gets a const reference to the underlying CPU manager
     [[nodiscard]] const CpuManager& GetCpuManager() const;
 
-    /// Gets a reference to the exclusive monitor
-    [[nodiscard]] ExclusiveMonitor& Monitor();
-
-    /// Gets a constant reference to the exclusive monitor
-    [[nodiscard]] const ExclusiveMonitor& Monitor() const;
-
     /// Gets a mutable reference to the system memory instance.
     [[nodiscard]] Core::Memory::Memory& ApplicationMemory();
 
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index 763a44fee..539c7f7af 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -166,6 +166,10 @@ u32 ProgramMetadata::GetSystemResourceSize() const {
     return npdm_header.system_resource_size;
 }
 
+PoolPartition ProgramMetadata::GetPoolPartition() const {
+    return acid_header.pool_partition;
+}
+
 const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const {
     return aci_kernel_capabilities;
 }
@@ -201,7 +205,7 @@ void ProgramMetadata::Print() const {
     // Begin ACID printing (potential perms, signed)
     LOG_DEBUG(Service_FS, "Magic:                  {:.4}", acid_header.magic.data());
     LOG_DEBUG(Service_FS, "Flags:                  0x{:02X}", acid_header.flags);
-    LOG_DEBUG(Service_FS, " > Is Retail:           {}", acid_header.is_retail ? "YES" : "NO");
+    LOG_DEBUG(Service_FS, " > Is Retail:           {}", acid_header.production_flag ? "YES" : "NO");
     LOG_DEBUG(Service_FS, "Title ID Min:           0x{:016X}", acid_header.title_id_min);
     LOG_DEBUG(Service_FS, "Title ID Max:           0x{:016X}", acid_header.title_id_max);
     LOG_DEBUG(Service_FS, "Filesystem Access:      0x{:016X}\n", acid_file_access.permissions);
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 76ee97d78..a53092b87 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -34,6 +34,13 @@ enum class ProgramFilePermission : u64 {
     Everything = 1ULL << 63,
 };
 
+enum class PoolPartition : u32 {
+    Application = 0,
+    Applet = 1,
+    System = 2,
+    SystemNonSecure = 3,
+};
+
 /**
  * Helper which implements an interface to parse Program Description Metadata (NPDM)
  * Data can either be loaded from a file path or with data and an offset into it.
@@ -72,6 +79,7 @@ public:
     u64 GetTitleID() const;
     u64 GetFilesystemPermissions() const;
     u32 GetSystemResourceSize() const;
+    PoolPartition GetPoolPartition() const;
     const KernelCapabilityDescriptors& GetKernelCapabilities() const;
     const std::array<u8, 0x10>& GetName() const {
         return npdm_header.application_name;
@@ -116,8 +124,9 @@ private:
         union {
             u32 flags;
 
-            BitField<0, 1, u32> is_retail;
-            BitField<1, 31, u32> flags_unk;
+            BitField<0, 1, u32> production_flag;
+            BitField<1, 1, u32> unqualified_approval;
+            BitField<2, 4, PoolPartition> pool_partition;
         };
         u64_le title_id_min;
         u64_le title_id_max;
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
index 78d43d729..48889253d 100644
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -4,6 +4,7 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
 #include "core/hle/kernel/k_address_arbiter.h"
+#include "core/hle/kernel/k_process.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
 #include "core/hle/kernel/k_thread.h"
@@ -26,9 +27,9 @@ bool ReadFromUser(KernelCore& kernel, s32* out, KProcessAddress address) {
     return true;
 }
 
-bool DecrementIfLessThan(Core::System& system, s32* out, KProcessAddress address, s32 value) {
-    auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+bool DecrementIfLessThan(KernelCore& kernel, s32* out, KProcessAddress address, s32 value) {
+    auto& monitor = GetCurrentProcess(kernel).GetExclusiveMonitor();
+    const auto current_core = kernel.CurrentPhysicalCoreIndex();
 
     // NOTE: If scheduler lock is not held here, interrupt disable is required.
     // KScopedInterruptDisable di;
@@ -66,10 +67,10 @@ bool DecrementIfLessThan(Core::System& system, s32* out, KProcessAddress address
     return true;
 }
 
-bool UpdateIfEqual(Core::System& system, s32* out, KProcessAddress address, s32 value,
+bool UpdateIfEqual(KernelCore& kernel, s32* out, KProcessAddress address, s32 value,
                    s32 new_value) {
-    auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+    auto& monitor = GetCurrentProcess(kernel).GetExclusiveMonitor();
+    const auto current_core = kernel.CurrentPhysicalCoreIndex();
 
     // NOTE: If scheduler lock is not held here, interrupt disable is required.
     // KScopedInterruptDisable di;
@@ -159,7 +160,7 @@ Result KAddressArbiter::SignalAndIncrementIfEqual(uint64_t addr, s32 value, s32
 
         // Check the userspace value.
         s32 user_value{};
-        R_UNLESS(UpdateIfEqual(m_system, std::addressof(user_value), addr, value, value + 1),
+        R_UNLESS(UpdateIfEqual(m_kernel, std::addressof(user_value), addr, value, value + 1),
                  ResultInvalidCurrentMemory);
         R_UNLESS(user_value == value, ResultInvalidState);
 
@@ -219,7 +220,7 @@ Result KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(uint64_t addr, s32
         s32 user_value{};
         bool succeeded{};
         if (value != new_value) {
-            succeeded = UpdateIfEqual(m_system, std::addressof(user_value), addr, value, new_value);
+            succeeded = UpdateIfEqual(m_kernel, std::addressof(user_value), addr, value, new_value);
         } else {
             succeeded = ReadFromUser(m_kernel, std::addressof(user_value), addr);
         }
@@ -262,7 +263,7 @@ Result KAddressArbiter::WaitIfLessThan(uint64_t addr, s32 value, bool decrement,
         s32 user_value{};
         bool succeeded{};
         if (decrement) {
-            succeeded = DecrementIfLessThan(m_system, std::addressof(user_value), addr, value);
+            succeeded = DecrementIfLessThan(m_kernel, std::addressof(user_value), addr, value);
         } else {
             succeeded = ReadFromUser(m_kernel, std::addressof(user_value), addr);
         }
diff --git a/src/core/hle/kernel/k_client_port.cpp b/src/core/hle/kernel/k_client_port.cpp
index 11b1b977e..68cea978a 100644
--- a/src/core/hle/kernel/k_client_port.cpp
+++ b/src/core/hle/kernel/k_client_port.cpp
@@ -58,9 +58,8 @@ Result KClientPort::CreateSession(KClientSession** out) {
     KSession* session{};
 
     // Reserve a new session from the resource limit.
-    //! FIXME: we are reserving this from the wrong resource limit!
-    KScopedResourceReservation session_reservation(
-        m_kernel.ApplicationProcess()->GetResourceLimit(), LimitableResource::SessionCountMax);
+    KScopedResourceReservation session_reservation(GetCurrentProcessPointer(m_kernel),
+                                                   LimitableResource::SessionCountMax);
     R_UNLESS(session_reservation.Succeeded(), ResultLimitReached);
 
     // Allocate a session normally.
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
index 7633a51fb..94ea3527a 100644
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -28,10 +28,10 @@ bool WriteToUser(KernelCore& kernel, KProcessAddress address, const u32* p) {
     return true;
 }
 
-bool UpdateLockAtomic(Core::System& system, u32* out, KProcessAddress address, u32 if_zero,
+bool UpdateLockAtomic(KernelCore& kernel, u32* out, KProcessAddress address, u32 if_zero,
                       u32 new_orr_mask) {
-    auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+    auto& monitor = GetCurrentProcess(kernel).GetExclusiveMonitor();
+    const auto current_core = kernel.CurrentPhysicalCoreIndex();
 
     u32 expected{};
 
@@ -208,7 +208,7 @@ void KConditionVariable::SignalImpl(KThread* thread) {
         // TODO(bunnei): We should call CanAccessAtomic(..) here.
         can_access = true;
         if (can_access) [[likely]] {
-            UpdateLockAtomic(m_system, std::addressof(prev_tag), address, own_tag,
+            UpdateLockAtomic(m_kernel, std::addressof(prev_tag), address, own_tag,
                              Svc::HandleWaitMask);
         }
     }
diff --git a/src/core/hle/kernel/k_handle_table.h b/src/core/hle/kernel/k_handle_table.h
index d7660630c..4e6dcd66b 100644
--- a/src/core/hle/kernel/k_handle_table.h
+++ b/src/core/hle/kernel/k_handle_table.h
@@ -30,7 +30,7 @@ public:
 public:
     explicit KHandleTable(KernelCore& kernel) : m_kernel(kernel) {}
 
-    Result Initialize(s32 size) {
+    Result Initialize(KProcess* owner, s32 size) {
         // Check that the table size is valid.
         R_UNLESS(size <= static_cast<s32>(MaxTableSize), ResultOutOfMemory);
 
@@ -44,6 +44,7 @@ public:
         m_next_linear_id = MinLinearId;
         m_count = 0;
         m_free_head_index = -1;
+        m_owner = owner;
 
         // Free all entries.
         for (s32 i = 0; i < static_cast<s32>(m_table_size); ++i) {
@@ -90,8 +91,8 @@ public:
         // Handle pseudo-handles.
         if constexpr (std::derived_from<KProcess, T>) {
             if (handle == Svc::PseudoHandle::CurrentProcess) {
-                //! FIXME: this is the wrong process!
-                auto* const cur_process = m_kernel.ApplicationProcess();
+                // TODO: this should be the current process
+                auto* const cur_process = m_owner;
                 ASSERT(cur_process != nullptr);
                 return cur_process;
             }
@@ -301,6 +302,7 @@ private:
 
 private:
     KernelCore& m_kernel;
+    KProcess* m_owner{};
     std::array<EntryInfo, MaxTableSize> m_entry_infos{};
     std::array<KAutoObject*, MaxTableSize> m_objects{};
     mutable KSpinLock m_lock;
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index 3a2635e1f..d6869c228 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -306,12 +306,16 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa
             False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge);
         R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool,
                                       params.code_address, params.code_num_pages * PageSize,
-                                      m_system_resource, res_limit, this->GetMemory(), 0));
+                                      m_system_resource, res_limit, m_memory, 0));
     }
     ON_RESULT_FAILURE_2 {
         m_page_table.Finalize();
     };
 
+    // Ensure our memory is initialized.
+    m_memory.SetCurrentPageTable(*this);
+    m_memory.SetGPUDirtyManagers(m_dirty_memory_managers);
+
     // Ensure we can insert the code region.
     R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize,
                                      KMemoryState::Code),
@@ -399,12 +403,16 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params,
             False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge);
         R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool,
                                       params.code_address, code_size, m_system_resource, res_limit,
-                                      this->GetMemory(), aslr_space_start));
+                                      m_memory, aslr_space_start));
     }
     ON_RESULT_FAILURE_2 {
         m_page_table.Finalize();
     };
 
+    // Ensure our memory is initialized.
+    m_memory.SetCurrentPageTable(*this);
+    m_memory.SetGPUDirtyManagers(m_dirty_memory_managers);
+
     // Ensure we can insert the code region.
     R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code),
              ResultInvalidMemoryRegion);
@@ -1094,8 +1102,7 @@ void KProcess::UnpinThread(KThread* thread) {
 
 Result KProcess::GetThreadList(s32* out_num_threads, KProcessAddress out_thread_ids,
                                s32 max_out_count) {
-    // TODO: use current memory reference
-    auto& memory = m_kernel.System().ApplicationMemory();
+    auto& memory = this->GetMemory();
 
     // Lock the list.
     KScopedLightLock lk(m_list_lock);
@@ -1128,14 +1135,15 @@ void KProcess::Switch(KProcess* cur_process, KProcess* next_process) {}
 KProcess::KProcess(KernelCore& kernel)
     : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel},
       m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()},
-      m_handle_table{kernel} {}
+      m_handle_table{kernel}, m_dirty_memory_managers{},
+      m_exclusive_monitor{}, m_memory{kernel.System()} {}
 KProcess::~KProcess() = default;
 
 Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
                                   KProcessAddress aslr_space_start, bool is_hbl) {
     // Create a resource limit for the process.
-    const auto physical_memory_size =
-        m_kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application);
+    const auto pool = static_cast<KMemoryManager::Pool>(metadata.GetPoolPartition());
+    const auto physical_memory_size = m_kernel.MemoryManager().GetSize(pool);
     auto* res_limit =
         Kernel::CreateResourceLimitForProcess(m_kernel.System(), physical_memory_size);
 
@@ -1146,8 +1154,10 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
     Svc::CreateProcessFlag flag{};
     u64 code_address{};
 
-    // We are an application.
-    flag |= Svc::CreateProcessFlag::IsApplication;
+    // Determine if we are an application.
+    if (pool == KMemoryManager::Pool::Application) {
+        flag |= Svc::CreateProcessFlag::IsApplication;
+    }
 
     // If we are 64-bit, create as such.
     if (metadata.Is64BitProgram()) {
@@ -1196,8 +1206,8 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
     std::memcpy(params.name.data(), name.data(), sizeof(params.name));
 
     // Initialize for application process.
-    R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit,
-                           KMemoryManager::Pool::Application, aslr_space_start));
+    R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit, pool,
+                           aslr_space_start));
 
     // Assign remaining properties.
     m_is_hbl = is_hbl;
@@ -1223,7 +1233,7 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
     ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite);
 
 #ifdef HAS_NCE
-    if (Settings::IsNceEnabled()) {
+    if (this->IsApplication() && Settings::IsNceEnabled()) {
         auto& buffer = m_kernel.System().DeviceMemory().buffer;
         const auto& code = code_set.CodeSegment();
         const auto& patch = code_set.PatchSegment();
@@ -1235,10 +1245,11 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
 }
 
 void KProcess::InitializeInterfaces() {
-    this->GetMemory().SetCurrentPageTable(*this);
+    m_exclusive_monitor =
+        Core::MakeExclusiveMonitor(this->GetMemory(), Core::Hardware::NUM_CPU_CORES);
 
 #ifdef HAS_NCE
-    if (this->Is64Bit() && Settings::IsNceEnabled()) {
+    if (this->IsApplication() && Settings::IsNceEnabled()) {
         for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
             m_arm_interfaces[i] = std::make_unique<Core::ArmNce>(m_kernel.System(), true, i);
         }
@@ -1248,13 +1259,13 @@ void KProcess::InitializeInterfaces() {
         for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
             m_arm_interfaces[i] = std::make_unique<Core::ArmDynarmic64>(
                 m_kernel.System(), m_kernel.IsMulticore(), this,
-                static_cast<Core::DynarmicExclusiveMonitor&>(m_kernel.GetExclusiveMonitor()), i);
+                static_cast<Core::DynarmicExclusiveMonitor&>(*m_exclusive_monitor), i);
         }
     } else {
         for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
             m_arm_interfaces[i] = std::make_unique<Core::ArmDynarmic32>(
                 m_kernel.System(), m_kernel.IsMulticore(), this,
-                static_cast<Core::DynarmicExclusiveMonitor&>(m_kernel.GetExclusiveMonitor()), i);
+                static_cast<Core::DynarmicExclusiveMonitor&>(*m_exclusive_monitor), i);
         }
     }
 }
@@ -1305,9 +1316,10 @@ bool KProcess::RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointT
     return true;
 }
 
-Core::Memory::Memory& KProcess::GetMemory() const {
-    // TODO: per-process memory
-    return m_kernel.System().ApplicationMemory();
+void KProcess::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
+    for (auto& manager : m_dirty_memory_managers) {
+        manager.Gather(callback);
+    }
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index 4b114e39b..b5c6867a1 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -7,6 +7,7 @@
 
 #include "core/arm/arm_interface.h"
 #include "core/file_sys/program_metadata.h"
+#include "core/gpu_dirty_memory_manager.h"
 #include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/k_address_arbiter.h"
 #include "core/hle/kernel/k_capabilities.h"
@@ -17,6 +18,7 @@
 #include "core/hle/kernel/k_system_resource.h"
 #include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/k_thread_local_page.h"
+#include "core/memory.h"
 
 namespace Kernel {
 
@@ -126,6 +128,9 @@ private:
 #ifdef HAS_NCE
     std::unordered_map<u64, u64> m_post_handlers{};
 #endif
+    std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> m_dirty_memory_managers;
+    std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor;
+    Core::Memory::Memory m_memory;
 
 private:
     Result StartTermination();
@@ -502,7 +507,15 @@ public:
 
     void InitializeInterfaces();
 
-    Core::Memory::Memory& GetMemory() const;
+    Core::Memory::Memory& GetMemory() {
+        return m_memory;
+    }
+
+    void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
+
+    Core::ExclusiveMonitor& GetExclusiveMonitor() const {
+        return *m_exclusive_monitor;
+    }
 
 public:
     // Overridden parent functions.
@@ -539,7 +552,7 @@ private:
 
     Result InitializeHandleTable(s32 size) {
         // Try to initialize the handle table.
-        R_TRY(m_handle_table.Initialize(size));
+        R_TRY(m_handle_table.Initialize(this, size));
 
         // We succeeded, so note that we did.
         m_is_handle_table_initialized = true;
diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp
index e33a88e24..f6ca3dc48 100644
--- a/src/core/hle/kernel/k_server_session.cpp
+++ b/src/core/hle/kernel/k_server_session.cpp
@@ -8,6 +8,7 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "common/scope_exit.h"
+#include "common/scratch_buffer.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/k_client_port.h"
@@ -29,12 +30,138 @@ namespace Kernel {
 
 namespace {
 
+constexpr inline size_t PointerTransferBufferAlignment = 0x10;
+constexpr inline size_t ReceiveListDataSize =
+    MessageBuffer::MessageHeader::ReceiveListCountType_CountMax *
+    MessageBuffer::ReceiveListEntry::GetDataSize() / sizeof(u32);
+
+using ThreadQueueImplForKServerSessionRequest = KThreadQueue;
+
+class ReceiveList {
+public:
+    static constexpr int GetEntryCount(const MessageBuffer::MessageHeader& header) {
+        const auto count = header.GetReceiveListCount();
+        switch (count) {
+        case MessageBuffer::MessageHeader::ReceiveListCountType_None:
+            return 0;
+        case MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer:
+            return 0;
+        case MessageBuffer::MessageHeader::ReceiveListCountType_ToSingleBuffer:
+            return 1;
+        default:
+            return count - MessageBuffer::MessageHeader::ReceiveListCountType_CountOffset;
+        }
+    }
+
+    explicit ReceiveList(const u32* dst_msg, uint64_t dst_address,
+                         KProcessPageTable& dst_page_table,
+                         const MessageBuffer::MessageHeader& dst_header,
+                         const MessageBuffer::SpecialHeader& dst_special_header, size_t msg_size,
+                         size_t out_offset, s32 dst_recv_list_idx, bool is_tls) {
+        m_recv_list_count = dst_header.GetReceiveListCount();
+        m_msg_buffer_end = dst_address + sizeof(u32) * out_offset;
+        m_msg_buffer_space_end = dst_address + msg_size;
+
+        // NOTE: Nintendo calculates the receive list index here using the special header.
+        // We pre-calculate it in the caller, and pass it as a parameter.
+        (void)dst_special_header;
+
+        const u32* recv_list = dst_msg + dst_recv_list_idx;
+        const auto entry_count = GetEntryCount(dst_header);
+
+        if (is_tls) {
+            // Messages from TLS to TLS are contained within one page.
+            std::memcpy(m_data.data(), recv_list,
+                        entry_count * MessageBuffer::ReceiveListEntry::GetDataSize());
+        } else {
+            // If any buffer is not from TLS, perform a normal read instead.
+            uint64_t cur_addr = dst_address + dst_recv_list_idx * sizeof(u32);
+            dst_page_table.GetMemory().ReadBlock(
+                cur_addr, m_data.data(),
+                entry_count * MessageBuffer::ReceiveListEntry::GetDataSize());
+        }
+    }
+
+    bool IsIndex() const {
+        return m_recv_list_count >
+               static_cast<s32>(MessageBuffer::MessageHeader::ReceiveListCountType_CountOffset);
+    }
+
+    bool IsToMessageBuffer() const {
+        return m_recv_list_count ==
+               MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer;
+    }
+
+    void GetBuffer(uint64_t& out, size_t size, int& key) const {
+        switch (m_recv_list_count) {
+        case MessageBuffer::MessageHeader::ReceiveListCountType_None: {
+            out = 0;
+            break;
+        }
+        case MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer: {
+            const uint64_t buf =
+                Common::AlignUp(m_msg_buffer_end + key, PointerTransferBufferAlignment);
+
+            if ((buf < buf + size) && (buf + size <= m_msg_buffer_space_end)) {
+                out = buf;
+                key = static_cast<int>(buf + size - m_msg_buffer_end);
+            } else {
+                out = 0;
+            }
+            break;
+        }
+        case MessageBuffer::MessageHeader::ReceiveListCountType_ToSingleBuffer: {
+            const MessageBuffer::ReceiveListEntry entry(m_data[0], m_data[1]);
+            const uint64_t buf =
+                Common::AlignUp(entry.GetAddress() + key, PointerTransferBufferAlignment);
+
+            const uint64_t entry_addr = entry.GetAddress();
+            const size_t entry_size = entry.GetSize();
+
+            if ((buf < buf + size) && (entry_addr < entry_addr + entry_size) &&
+                (buf + size <= entry_addr + entry_size)) {
+                out = buf;
+                key = static_cast<int>(buf + size - entry_addr);
+            } else {
+                out = 0;
+            }
+            break;
+        }
+        default: {
+            if (key < m_recv_list_count -
+                          static_cast<s32>(
+                              MessageBuffer::MessageHeader::ReceiveListCountType_CountOffset)) {
+                const MessageBuffer::ReceiveListEntry entry(m_data[2 * key + 0],
+                                                            m_data[2 * key + 1]);
+
+                const uintptr_t entry_addr = entry.GetAddress();
+                const size_t entry_size = entry.GetSize();
+
+                if ((entry_addr < entry_addr + entry_size) && (entry_size >= size)) {
+                    out = entry_addr;
+                }
+            } else {
+                out = 0;
+            }
+            break;
+        }
+        }
+    }
+
+private:
+    std::array<u32, ReceiveListDataSize> m_data;
+    s32 m_recv_list_count;
+    uint64_t m_msg_buffer_end;
+    uint64_t m_msg_buffer_space_end;
+};
+
 template <bool MoveHandleAllowed>
-Result ProcessMessageSpecialData(KProcess& dst_process, KProcess& src_process, KThread& src_thread,
-                                 MessageBuffer& dst_msg, const MessageBuffer& src_msg,
-                                 MessageBuffer::SpecialHeader& src_special_header) {
+Result ProcessMessageSpecialData(s32& offset, KProcess& dst_process, KProcess& src_process,
+                                 KThread& src_thread, const MessageBuffer& dst_msg,
+                                 const MessageBuffer& src_msg,
+                                 const MessageBuffer::SpecialHeader& src_special_header) {
     // Copy the special header to the destination.
-    s32 offset = dst_msg.Set(src_special_header);
+    offset = dst_msg.Set(src_special_header);
 
     // Copy the process ID.
     if (src_special_header.GetHasProcessId()) {
@@ -110,6 +237,102 @@ Result ProcessMessageSpecialData(KProcess& dst_process, KProcess& src_process, K
     R_RETURN(result);
 }
 
+Result ProcessReceiveMessagePointerDescriptors(int& offset, int& pointer_key,
+                                               KProcessPageTable& dst_page_table,
+                                               KProcessPageTable& src_page_table,
+                                               const MessageBuffer& dst_msg,
+                                               const MessageBuffer& src_msg,
+                                               const ReceiveList& dst_recv_list, bool dst_user) {
+    // Get the offset at the start of processing.
+    const int cur_offset = offset;
+
+    // Get the pointer desc.
+    MessageBuffer::PointerDescriptor src_desc(src_msg, cur_offset);
+    offset += static_cast<int>(MessageBuffer::PointerDescriptor::GetDataSize() / sizeof(u32));
+
+    // Extract address/size.
+    const uint64_t src_pointer = src_desc.GetAddress();
+    const size_t recv_size = src_desc.GetSize();
+    uint64_t recv_pointer = 0;
+
+    // Process the buffer, if it has a size.
+    if (recv_size > 0) {
+        // If using indexing, set index.
+        if (dst_recv_list.IsIndex()) {
+            pointer_key = src_desc.GetIndex();
+        }
+
+        // Get the buffer.
+        dst_recv_list.GetBuffer(recv_pointer, recv_size, pointer_key);
+        R_UNLESS(recv_pointer != 0, ResultOutOfResource);
+
+        // Perform the pointer data copy.
+        if (dst_user) {
+            R_TRY(src_page_table.CopyMemoryFromHeapToHeapWithoutCheckDestination(
+                dst_page_table, recv_pointer, recv_size, KMemoryState::FlagReferenceCounted,
+                KMemoryState::FlagReferenceCounted,
+                KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite,
+                KMemoryAttribute::Uncached | KMemoryAttribute::Locked, KMemoryAttribute::Locked,
+                src_pointer, KMemoryState::FlagLinearMapped, KMemoryState::FlagLinearMapped,
+                KMemoryPermission::UserRead, KMemoryAttribute::Uncached, KMemoryAttribute::None));
+        } else {
+            R_TRY(src_page_table.CopyMemoryFromLinearToUser(
+                recv_pointer, recv_size, src_pointer, KMemoryState::FlagLinearMapped,
+                KMemoryState::FlagLinearMapped, KMemoryPermission::UserRead,
+                KMemoryAttribute::Uncached, KMemoryAttribute::None));
+        }
+    }
+
+    // Set the output descriptor.
+    dst_msg.Set(cur_offset, MessageBuffer::PointerDescriptor(reinterpret_cast<void*>(recv_pointer),
+                                                             recv_size, src_desc.GetIndex()));
+
+    R_SUCCEED();
+}
+
+constexpr Result GetMapAliasMemoryState(KMemoryState& out,
+                                        MessageBuffer::MapAliasDescriptor::Attribute attr) {
+    switch (attr) {
+    case MessageBuffer::MapAliasDescriptor::Attribute::Ipc:
+        out = KMemoryState::Ipc;
+        break;
+    case MessageBuffer::MapAliasDescriptor::Attribute::NonSecureIpc:
+        out = KMemoryState::NonSecureIpc;
+        break;
+    case MessageBuffer::MapAliasDescriptor::Attribute::NonDeviceIpc:
+        out = KMemoryState::NonDeviceIpc;
+        break;
+    default:
+        R_THROW(ResultInvalidCombination);
+    }
+
+    R_SUCCEED();
+}
+
+constexpr Result GetMapAliasTestStateAndAttributeMask(KMemoryState& out_state,
+                                                      KMemoryAttribute& out_attr_mask,
+                                                      KMemoryState state) {
+    switch (state) {
+    case KMemoryState::Ipc:
+        out_state = KMemoryState::FlagCanUseIpc;
+        out_attr_mask =
+            KMemoryAttribute::Uncached | KMemoryAttribute::DeviceShared | KMemoryAttribute::Locked;
+        break;
+    case KMemoryState::NonSecureIpc:
+        out_state = KMemoryState::FlagCanUseNonSecureIpc;
+        out_attr_mask = KMemoryAttribute::Uncached | KMemoryAttribute::Locked;
+        break;
+    case KMemoryState::NonDeviceIpc:
+        out_state = KMemoryState::FlagCanUseNonDeviceIpc;
+        out_attr_mask = KMemoryAttribute::Uncached | KMemoryAttribute::Locked;
+        break;
+    default:
+        R_THROW(ResultInvalidCombination);
+    }
+
+    R_SUCCEED();
+}
+
 void CleanupSpecialData(KProcess& dst_process, u32* dst_msg_ptr, size_t dst_buffer_size) {
     // Parse the message.
     const MessageBuffer dst_msg(dst_msg_ptr, dst_buffer_size);
@@ -144,166 +367,856 @@ void CleanupSpecialData(KProcess& dst_process, u32* dst_msg_ptr, size_t dst_buff
     }
 }
 
-} // namespace
+Result CleanupServerHandles(KernelCore& kernel, uint64_t message, size_t buffer_size,
+                            KPhysicalAddress message_paddr) {
+    // Server is assumed to be current thread.
+    KThread& thread = GetCurrentThread(kernel);
 
-using ThreadQueueImplForKServerSessionRequest = KThreadQueue;
+    // Get the linear message pointer.
+    u32* msg_ptr;
+    if (message) {
+        msg_ptr = kernel.System().DeviceMemory().GetPointer<u32>(message_paddr);
+    } else {
+        msg_ptr = GetCurrentMemory(kernel).GetPointer<u32>(thread.GetTlsAddress());
+        buffer_size = MessageBufferSize;
+        message = GetInteger(thread.GetTlsAddress());
+    }
 
-KServerSession::KServerSession(KernelCore& kernel)
-    : KSynchronizationObject{kernel}, m_lock{m_kernel} {}
+    // Parse the message.
+    const MessageBuffer msg(msg_ptr, buffer_size);
+    const MessageBuffer::MessageHeader header(msg);
+    const MessageBuffer::SpecialHeader special_header(msg, header);
 
-KServerSession::~KServerSession() = default;
+    // Check that the size is big enough.
+    R_UNLESS(MessageBuffer::GetMessageBufferSize(header, special_header) <= buffer_size,
+             ResultInvalidCombination);
+
+    // If there's a special header, there may be move handles we need to close.
+    if (header.GetHasSpecialHeader()) {
+        // Determine the offset to the start of handles.
+        auto offset = msg.GetSpecialDataIndex(header, special_header);
+        if (special_header.GetHasProcessId()) {
+            offset += static_cast<int>(sizeof(u64) / sizeof(u32));
+        }
+        if (auto copy_count = special_header.GetCopyHandleCount(); copy_count > 0) {
+            offset += static_cast<int>((sizeof(Svc::Handle) * copy_count) / sizeof(u32));
+        }
 
-void KServerSession::Destroy() {
-    m_parent->OnServerClosed();
+        // Get the handle table.
+        auto& handle_table = thread.GetOwnerProcess()->GetHandleTable();
 
-    this->CleanupRequests();
+        // Close the handles.
+        for (auto i = 0; i < special_header.GetMoveHandleCount(); ++i) {
+            handle_table.Remove(msg.GetHandle(offset));
+            offset += static_cast<int>(sizeof(Svc::Handle) / sizeof(u32));
+        }
+    }
 
-    m_parent->Close();
+    R_SUCCEED();
 }
 
-void KServerSession::OnClientClosed() {
-    KScopedLightLock lk{m_lock};
+Result CleanupServerMap(KSessionRequest* request, KProcess* server_process) {
+    // If there's no server process, there's nothing to clean up.
+    R_SUCCEED_IF(server_process == nullptr);
 
-    // Handle any pending requests.
-    KSessionRequest* prev_request = nullptr;
-    while (true) {
-        // Declare variables for processing the request.
-        KSessionRequest* request = nullptr;
-        KEvent* event = nullptr;
-        KThread* thread = nullptr;
-        bool cur_request = false;
-        bool terminate = false;
+    // Get the page table.
+    auto& server_page_table = server_process->GetPageTable();
 
-        // Get the next request.
-        {
-            KScopedSchedulerLock sl{m_kernel};
+    // Cleanup Send mappings.
+    for (size_t i = 0; i < request->GetSendCount(); ++i) {
+        R_TRY(server_page_table.CleanupForIpcServer(request->GetSendServerAddress(i),
+                                                    request->GetSendSize(i),
+                                                    request->GetSendMemoryState(i)));
+    }
 
-            if (m_current_request != nullptr && m_current_request != prev_request) {
-                // Set the request, open a reference as we process it.
-                request = m_current_request;
-                request->Open();
-                cur_request = true;
+    // Cleanup Receive mappings.
+    for (size_t i = 0; i < request->GetReceiveCount(); ++i) {
+        R_TRY(server_page_table.CleanupForIpcServer(request->GetReceiveServerAddress(i),
+                                                    request->GetReceiveSize(i),
+                                                    request->GetReceiveMemoryState(i)));
+    }
 
-                // Get thread and event for the request.
-                thread = request->GetThread();
-                event = request->GetEvent();
+    // Cleanup Exchange mappings.
+    for (size_t i = 0; i < request->GetExchangeCount(); ++i) {
+        R_TRY(server_page_table.CleanupForIpcServer(request->GetExchangeServerAddress(i),
+                                                    request->GetExchangeSize(i),
+                                                    request->GetExchangeMemoryState(i)));
+    }
 
-                // If the thread is terminating, handle that.
-                if (thread->IsTerminationRequested()) {
-                    request->ClearThread();
-                    request->ClearEvent();
-                    terminate = true;
-                }
+    R_SUCCEED();
+}
 
-                prev_request = request;
-            } else if (!m_request_list.empty()) {
-                // Pop the request from the front of the list.
-                request = std::addressof(m_request_list.front());
-                m_request_list.pop_front();
+Result CleanupClientMap(KSessionRequest* request, KProcessPageTable* client_page_table) {
+    // If there's no client page table, there's nothing to clean up.
+    R_SUCCEED_IF(client_page_table == nullptr);
 
-                // Get thread and event for the request.
-                thread = request->GetThread();
-                event = request->GetEvent();
-            }
+    // Cleanup Send mappings.
+    for (size_t i = 0; i < request->GetSendCount(); ++i) {
+        R_TRY(client_page_table->CleanupForIpcClient(request->GetSendClientAddress(i),
+                                                     request->GetSendSize(i),
+                                                     request->GetSendMemoryState(i)));
+    }
+
+    // Cleanup Receive mappings.
+    for (size_t i = 0; i < request->GetReceiveCount(); ++i) {
+        R_TRY(client_page_table->CleanupForIpcClient(request->GetReceiveClientAddress(i),
+                                                     request->GetReceiveSize(i),
+                                                     request->GetReceiveMemoryState(i)));
+    }
+
+    // Cleanup Exchange mappings.
+    for (size_t i = 0; i < request->GetExchangeCount(); ++i) {
+        R_TRY(client_page_table->CleanupForIpcClient(request->GetExchangeClientAddress(i),
+                                                     request->GetExchangeSize(i),
+                                                     request->GetExchangeMemoryState(i)));
+    }
+
+    R_SUCCEED();
+}
+
+Result CleanupMap(KSessionRequest* request, KProcess* server_process,
+                  KProcessPageTable* client_page_table) {
+    // Cleanup the server map.
+    R_TRY(CleanupServerMap(request, server_process));
+
+    // Cleanup the client map.
+    R_TRY(CleanupClientMap(request, client_page_table));
+
+    R_SUCCEED();
+}
+
+Result ProcessReceiveMessageMapAliasDescriptors(int& offset, KProcessPageTable& dst_page_table,
+                                                KProcessPageTable& src_page_table,
+                                                const MessageBuffer& dst_msg,
+                                                const MessageBuffer& src_msg,
+                                                KSessionRequest* request, KMemoryPermission perm,
+                                                bool send) {
+    // Get the offset at the start of processing.
+    const int cur_offset = offset;
+
+    // Get the map alias descriptor.
+    MessageBuffer::MapAliasDescriptor src_desc(src_msg, cur_offset);
+    offset += static_cast<int>(MessageBuffer::MapAliasDescriptor::GetDataSize() / sizeof(u32));
+
+    // Extract address/size.
+    const KProcessAddress src_address = src_desc.GetAddress();
+    const size_t size = src_desc.GetSize();
+    KProcessAddress dst_address = 0;
+
+    // Determine the result memory state.
+    KMemoryState dst_state;
+    R_TRY(GetMapAliasMemoryState(dst_state, src_desc.GetAttribute()));
+
+    // Process the buffer, if it has a size.
+    if (size > 0) {
+        // Set up the source pages for ipc.
+        R_TRY(dst_page_table.SetupForIpc(std::addressof(dst_address), size, src_address,
+                                         src_page_table, perm, dst_state, send));
+
+        // Ensure that we clean up on failure.
+        ON_RESULT_FAILURE {
+            dst_page_table.CleanupForIpcServer(dst_address, size, dst_state);
+            src_page_table.CleanupForIpcClient(src_address, size, dst_state);
+        };
+
+        // Push the appropriate mapping.
+        if (perm == KMemoryPermission::UserRead) {
+            R_TRY(request->PushSend(src_address, dst_address, size, dst_state));
+        } else if (send) {
+            R_TRY(request->PushExchange(src_address, dst_address, size, dst_state));
+        } else {
+            R_TRY(request->PushReceive(src_address, dst_address, size, dst_state));
         }
+    }
 
-        // If there are no requests, we're done.
-        if (request == nullptr) {
-            break;
+    // Set the output descriptor.
+    dst_msg.Set(cur_offset,
+                MessageBuffer::MapAliasDescriptor(reinterpret_cast<void*>(GetInteger(dst_address)),
+                                                  size, src_desc.GetAttribute()));
+
+    R_SUCCEED();
+}
+
+Result ReceiveMessage(KernelCore& kernel, bool& recv_list_broken, uint64_t dst_message_buffer,
+                      size_t dst_buffer_size, KPhysicalAddress dst_message_paddr,
+                      KThread& src_thread, uint64_t src_message_buffer, size_t src_buffer_size,
+                      KServerSession* session, KSessionRequest* request) {
+    // Prepare variables for receive.
+    KThread& dst_thread = GetCurrentThread(kernel);
+    KProcess& dst_process = *(dst_thread.GetOwnerProcess());
+    KProcess& src_process = *(src_thread.GetOwnerProcess());
+    auto& dst_page_table = dst_process.GetPageTable();
+    auto& src_page_table = src_process.GetPageTable();
+
+    // NOTE: Session is used only for debugging, and so may go unused.
+    (void)session;
+
+    // The receive list is initially not broken.
+    recv_list_broken = false;
+
+    // Set the server process for the request.
+    request->SetServerProcess(std::addressof(dst_process));
+
+    // Determine the message buffers.
+    u32 *dst_msg_ptr, *src_msg_ptr;
+    bool dst_user, src_user;
+
+    if (dst_message_buffer) {
+        dst_msg_ptr = kernel.System().DeviceMemory().GetPointer<u32>(dst_message_paddr);
+        dst_user = true;
+    } else {
+        dst_msg_ptr = dst_page_table.GetMemory().GetPointer<u32>(dst_thread.GetTlsAddress());
+        dst_buffer_size = MessageBufferSize;
+        dst_message_buffer = GetInteger(dst_thread.GetTlsAddress());
+        dst_user = false;
+    }
+
+    if (src_message_buffer) {
+        // NOTE: Nintendo does not check the result of this GetPhysicalAddress call.
+        src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_message_buffer);
+        src_user = true;
+    } else {
+        src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_thread.GetTlsAddress());
+        src_buffer_size = MessageBufferSize;
+        src_message_buffer = GetInteger(src_thread.GetTlsAddress());
+        src_user = false;
+    }
+
+    // Parse the headers.
+    const MessageBuffer dst_msg(dst_msg_ptr, dst_buffer_size);
+    const MessageBuffer src_msg(src_msg_ptr, src_buffer_size);
+    const MessageBuffer::MessageHeader dst_header(dst_msg);
+    const MessageBuffer::MessageHeader src_header(src_msg);
+    const MessageBuffer::SpecialHeader dst_special_header(dst_msg, dst_header);
+    const MessageBuffer::SpecialHeader src_special_header(src_msg, src_header);
+
+    // Get the end of the source message.
+    const size_t src_end_offset =
+        MessageBuffer::GetRawDataIndex(src_header, src_special_header) + src_header.GetRawCount();
+
+    // Ensure that the headers fit.
+    R_UNLESS(MessageBuffer::GetMessageBufferSize(dst_header, dst_special_header) <= dst_buffer_size,
+             ResultInvalidCombination);
+    R_UNLESS(MessageBuffer::GetMessageBufferSize(src_header, src_special_header) <= src_buffer_size,
+             ResultInvalidCombination);
+
+    // Ensure the receive list offset is after the end of raw data.
+    if (dst_header.GetReceiveListOffset()) {
+        R_UNLESS(dst_header.GetReceiveListOffset() >=
+                     MessageBuffer::GetRawDataIndex(dst_header, dst_special_header) +
+                         dst_header.GetRawCount(),
+                 ResultInvalidCombination);
+    }
+
+    // Ensure that the destination buffer is big enough to receive the source.
+    R_UNLESS(dst_buffer_size >= src_end_offset * sizeof(u32), ResultMessageTooLarge);
+
+    // Get the receive list.
+    const s32 dst_recv_list_idx =
+        MessageBuffer::GetReceiveListIndex(dst_header, dst_special_header);
+    ReceiveList dst_recv_list(dst_msg_ptr, dst_message_buffer, dst_page_table, dst_header,
+                              dst_special_header, dst_buffer_size, src_end_offset,
+                              dst_recv_list_idx, !dst_user);
+
+    // Ensure that the source special header isn't invalid.
+    const bool src_has_special_header = src_header.GetHasSpecialHeader();
+    if (src_has_special_header) {
+        // Sending move handles from client -> server is not allowed.
+        R_UNLESS(src_special_header.GetMoveHandleCount() == 0, ResultInvalidCombination);
+    }
+
+    // Prepare for further processing.
+    int pointer_key = 0;
+    int offset = dst_msg.Set(src_header);
+
+    // Set up a guard to make sure that we end up in a clean state on error.
+    ON_RESULT_FAILURE {
+        // Cleanup mappings.
+        CleanupMap(request, std::addressof(dst_process), std::addressof(src_page_table));
+
+        // Cleanup special data.
+        if (src_header.GetHasSpecialHeader()) {
+            CleanupSpecialData(dst_process, dst_msg_ptr, dst_buffer_size);
         }
 
-        // All requests must have threads.
-        ASSERT(thread != nullptr);
+        // Cleanup the header if the receive list isn't broken.
+        if (!recv_list_broken) {
+            dst_msg.Set(dst_header);
+            if (dst_header.GetHasSpecialHeader()) {
+                dst_msg.Set(dst_special_header);
+            }
+        }
+    };
+
+    // Process any special data.
+    if (src_header.GetHasSpecialHeader()) {
+        // After we process, make sure we track whether the receive list is broken.
+        SCOPE_EXIT({
+            if (offset > dst_recv_list_idx) {
+                recv_list_broken = true;
+            }
+        });
 
-        // Ensure that we close the request when done.
-        SCOPE_EXIT({ request->Close(); });
+        // Process special data.
+        R_TRY(ProcessMessageSpecialData<false>(offset, dst_process, src_process, src_thread,
+                                               dst_msg, src_msg, src_special_header));
+    }
 
-        // If we're terminating, close a reference to the thread and event.
-        if (terminate) {
-            thread->Close();
-            if (event != nullptr) {
-                event->Close();
+    // Process any pointer buffers.
+    for (auto i = 0; i < src_header.GetPointerCount(); ++i) {
+        // After we process, make sure we track whether the receive list is broken.
+        SCOPE_EXIT({
+            if (offset > dst_recv_list_idx) {
+                recv_list_broken = true;
+            }
+        });
+
+        R_TRY(ProcessReceiveMessagePointerDescriptors(
+            offset, pointer_key, dst_page_table, src_page_table, dst_msg, src_msg, dst_recv_list,
+            dst_user && dst_header.GetReceiveListCount() ==
+                            MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer));
+    }
+
+    // Process any map alias buffers.
+    for (auto i = 0; i < src_header.GetMapAliasCount(); ++i) {
+        // After we process, make sure we track whether the receive list is broken.
+        SCOPE_EXIT({
+            if (offset > dst_recv_list_idx) {
+                recv_list_broken = true;
+            }
+        });
+
+        // We process in order send, recv, exch. Buffers after send (recv/exch) are ReadWrite.
+        const KMemoryPermission perm = (i >= src_header.GetSendCount())
+                                           ? KMemoryPermission::UserReadWrite
+                                           : KMemoryPermission::UserRead;
+
+        // Buffer is send if it is send or exch.
+        const bool send = (i < src_header.GetSendCount()) ||
+                          (i >= src_header.GetSendCount() + src_header.GetReceiveCount());
+
+        R_TRY(ProcessReceiveMessageMapAliasDescriptors(offset, dst_page_table, src_page_table,
+                                                       dst_msg, src_msg, request, perm, send));
+    }
+
+    // Process any raw data.
+    if (const auto raw_count = src_header.GetRawCount(); raw_count != 0) {
+        // After we process, make sure we track whether the receive list is broken.
+        SCOPE_EXIT({
+            if (offset + raw_count > dst_recv_list_idx) {
+                recv_list_broken = true;
             }
+        });
+
+        // Get the offset and size.
+        const size_t offset_words = offset * sizeof(u32);
+        const size_t raw_size = raw_count * sizeof(u32);
+
+        if (!dst_user && !src_user) {
+            // Fast case is TLS -> TLS, do raw memcpy if we can.
+            std::memcpy(dst_msg_ptr + offset, src_msg_ptr + offset, raw_size);
+        } else if (dst_user) {
+            // Determine how much fast size we can copy.
+            const size_t max_fast_size = std::min<size_t>(offset_words + raw_size, PageSize);
+            const size_t fast_size = max_fast_size - offset_words;
+
+            // Determine source state; if user buffer, we require heap, and otherwise only linear
+            // mapped (to enable tls use).
+            const auto src_state =
+                src_user ? KMemoryState::FlagReferenceCounted : KMemoryState::FlagLinearMapped;
+
+            // Determine the source permission. User buffer should be unmapped + read, TLS should be
+            // user readable.
+            const KMemoryPermission src_perm = static_cast<KMemoryPermission>(
+                src_user ? KMemoryPermission::NotMapped | KMemoryPermission::KernelRead
+                         : KMemoryPermission::UserRead);
+
+            // Perform the fast part of the copy.
+            R_TRY(src_page_table.CopyMemoryFromLinearToKernel(
+                dst_msg_ptr + offset, fast_size, src_message_buffer + offset_words, src_state,
+                src_state, src_perm, KMemoryAttribute::Uncached, KMemoryAttribute::None));
+
+            // If the fast part of the copy didn't get everything, perform the slow part of the
+            // copy.
+            if (fast_size < raw_size) {
+                R_TRY(src_page_table.CopyMemoryFromHeapToHeap(
+                    dst_page_table, dst_message_buffer + max_fast_size, raw_size - fast_size,
+                    KMemoryState::FlagReferenceCounted, KMemoryState::FlagReferenceCounted,
+                    KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite,
+                    KMemoryAttribute::Uncached | KMemoryAttribute::Locked, KMemoryAttribute::Locked,
+                    src_message_buffer + max_fast_size, src_state, src_state, src_perm,
+                    KMemoryAttribute::Uncached, KMemoryAttribute::None));
+            }
+        } else /* if (src_user) */ {
+            // The source is a user buffer, so it should be unmapped + readable.
+            constexpr KMemoryPermission SourcePermission = static_cast<KMemoryPermission>(
+                KMemoryPermission::NotMapped | KMemoryPermission::KernelRead);
+
+            // Copy the memory.
+            R_TRY(src_page_table.CopyMemoryFromLinearToUser(
+                dst_message_buffer + offset_words, raw_size, src_message_buffer + offset_words,
+                KMemoryState::FlagReferenceCounted, KMemoryState::FlagReferenceCounted,
+                SourcePermission, KMemoryAttribute::Uncached, KMemoryAttribute::None));
         }
+    }
 
-        // If we need to, reply.
-        if (event != nullptr && !cur_request) {
-            // There must be no mappings.
-            ASSERT(request->GetSendCount() == 0);
-            ASSERT(request->GetReceiveCount() == 0);
-            ASSERT(request->GetExchangeCount() == 0);
+    // We succeeded!
+    R_SUCCEED();
+}
 
-            // // Get the process and page table.
-            // KProcess *client_process = thread->GetOwnerProcess();
-            // auto& client_pt = client_process->GetPageTable();
+Result ProcessSendMessageReceiveMapping(KProcessPageTable& src_page_table,
+                                        KProcessPageTable& dst_page_table,
+                                        KProcessAddress client_address,
+                                        KProcessAddress server_address, size_t size,
+                                        KMemoryState src_state) {
+    // If the size is zero, there's nothing to process.
+    R_SUCCEED_IF(size == 0);
+
+    // Get the memory state and attribute mask to test.
+    KMemoryState test_state;
+    KMemoryAttribute test_attr_mask;
+    R_TRY(GetMapAliasTestStateAndAttributeMask(test_state, test_attr_mask, src_state));
+
+    // Determine buffer extents.
+    KProcessAddress aligned_dst_start = Common::AlignDown(GetInteger(client_address), PageSize);
+    KProcessAddress aligned_dst_end = Common::AlignUp(GetInteger(client_address) + size, PageSize);
+    KProcessAddress mapping_dst_start = Common::AlignUp(GetInteger(client_address), PageSize);
+    KProcessAddress mapping_dst_end =
+        Common::AlignDown(GetInteger(client_address) + size, PageSize);
+
+    KProcessAddress mapping_src_end =
+        Common::AlignDown(GetInteger(server_address) + size, PageSize);
+
+    // If the start of the buffer is unaligned, handle that.
+    if (aligned_dst_start != mapping_dst_start) {
+        ASSERT(client_address < mapping_dst_start);
+        const size_t copy_size = std::min<size_t>(size, mapping_dst_start - client_address);
+        R_TRY(dst_page_table.CopyMemoryFromUserToLinear(
+            client_address, copy_size, test_state, test_state, KMemoryPermission::UserReadWrite,
+            test_attr_mask, KMemoryAttribute::None, server_address));
+    }
 
-            // // Reply to the request.
-            // ReplyAsyncError(client_process, request->GetAddress(), request->GetSize(),
-            //                 ResultSessionClosed);
+    // If the end of the buffer is unaligned, handle that.
+    if (mapping_dst_end < aligned_dst_end &&
+        (aligned_dst_start == mapping_dst_start || aligned_dst_start < mapping_dst_end)) {
+        const size_t copy_size = client_address + size - mapping_dst_end;
+        R_TRY(dst_page_table.CopyMemoryFromUserToLinear(
+            mapping_dst_end, copy_size, test_state, test_state, KMemoryPermission::UserReadWrite,
+            test_attr_mask, KMemoryAttribute::None, mapping_src_end));
+    }
 
-            // // Unlock the buffer.
-            // // NOTE: Nintendo does not check the result of this.
-            // client_pt.UnlockForIpcUserBuffer(request->GetAddress(), request->GetSize());
+    R_SUCCEED();
+}
 
-            // Signal the event.
-            event->Signal();
+Result ProcessSendMessagePointerDescriptors(int& offset, int& pointer_key,
+                                            KProcessPageTable& src_page_table,
+                                            KProcessPageTable& dst_page_table,
+                                            const MessageBuffer& dst_msg,
+                                            const MessageBuffer& src_msg,
+                                            const ReceiveList& dst_recv_list, bool dst_user) {
+    // Get the offset at the start of processing.
+    const int cur_offset = offset;
+
+    // Get the pointer desc.
+    MessageBuffer::PointerDescriptor src_desc(src_msg, cur_offset);
+    offset += static_cast<int>(MessageBuffer::PointerDescriptor::GetDataSize() / sizeof(u32));
+
+    // Extract address/size.
+    const uint64_t src_pointer = src_desc.GetAddress();
+    const size_t recv_size = src_desc.GetSize();
+    uint64_t recv_pointer = 0;
+
+    // Process the buffer, if it has a size.
+    if (recv_size > 0) {
+        // If using indexing, set index.
+        if (dst_recv_list.IsIndex()) {
+            pointer_key = src_desc.GetIndex();
         }
+
+        // Get the buffer.
+        dst_recv_list.GetBuffer(recv_pointer, recv_size, pointer_key);
+        R_UNLESS(recv_pointer != 0, ResultOutOfResource);
+
+        // Perform the pointer data copy.
+        const bool dst_heap = dst_user && dst_recv_list.IsToMessageBuffer();
+        const auto dst_state =
+            dst_heap ? KMemoryState::FlagReferenceCounted : KMemoryState::FlagLinearMapped;
+        const KMemoryPermission dst_perm =
+            dst_heap ? KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite
+                     : KMemoryPermission::UserReadWrite;
+        R_TRY(dst_page_table.CopyMemoryFromUserToLinear(
+            recv_pointer, recv_size, dst_state, dst_state, dst_perm, KMemoryAttribute::Uncached,
+            KMemoryAttribute::None, src_pointer));
     }
 
-    // Notify.
-    this->NotifyAvailable(ResultSessionClosed);
+    // Set the output descriptor.
+    dst_msg.Set(cur_offset, MessageBuffer::PointerDescriptor(reinterpret_cast<void*>(recv_pointer),
+                                                             recv_size, src_desc.GetIndex()));
+
+    R_SUCCEED();
 }
 
-bool KServerSession::IsSignaled() const {
-    ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel));
+Result SendMessage(KernelCore& kernel, uint64_t src_message_buffer, size_t src_buffer_size,
+                   KPhysicalAddress src_message_paddr, KThread& dst_thread,
+                   uint64_t dst_message_buffer, size_t dst_buffer_size, KServerSession* session,
+                   KSessionRequest* request) {
+    // Prepare variables for send.
+    KThread& src_thread = GetCurrentThread(kernel);
+    KProcess& dst_process = *(dst_thread.GetOwnerProcess());
+    KProcess& src_process = *(src_thread.GetOwnerProcess());
+    auto& dst_page_table = dst_process.GetPageTable();
+    auto& src_page_table = src_process.GetPageTable();
+
+    // NOTE: Session is used only for debugging, and so may go unused.
+    (void)session;
+
+    // Determine the message buffers.
+    u32 *dst_msg_ptr, *src_msg_ptr;
+    bool dst_user, src_user;
+
+    if (dst_message_buffer) {
+        // NOTE: Nintendo does not check the result of this GetPhysicalAddress call.
+        dst_msg_ptr = dst_page_table.GetMemory().GetPointer<u32>(dst_message_buffer);
+        dst_user = true;
+    } else {
+        dst_msg_ptr = dst_page_table.GetMemory().GetPointer<u32>(dst_thread.GetTlsAddress());
+        dst_buffer_size = MessageBufferSize;
+        dst_message_buffer = GetInteger(dst_thread.GetTlsAddress());
+        dst_user = false;
+    }
 
-    // If the client is closed, we're always signaled.
-    if (m_parent->IsClientClosed()) {
-        return true;
+    if (src_message_buffer) {
+        src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_message_buffer);
+        src_user = true;
+    } else {
+        src_msg_ptr = src_page_table.GetMemory().GetPointer<u32>(src_thread.GetTlsAddress());
+        src_buffer_size = MessageBufferSize;
+        src_message_buffer = GetInteger(src_thread.GetTlsAddress());
+        src_user = false;
     }
 
-    // Otherwise, we're signaled if we have a request and aren't handling one.
-    return !m_request_list.empty() && m_current_request == nullptr;
+    // Parse the headers.
+    const MessageBuffer dst_msg(dst_msg_ptr, dst_buffer_size);
+    const MessageBuffer src_msg(src_msg_ptr, src_buffer_size);
+    const MessageBuffer::MessageHeader dst_header(dst_msg);
+    const MessageBuffer::MessageHeader src_header(src_msg);
+    const MessageBuffer::SpecialHeader dst_special_header(dst_msg, dst_header);
+    const MessageBuffer::SpecialHeader src_special_header(src_msg, src_header);
+
+    // Get the end of the source message.
+    const size_t src_end_offset =
+        MessageBuffer::GetRawDataIndex(src_header, src_special_header) + src_header.GetRawCount();
+
+    // Declare variables for processing.
+    int offset = 0;
+    int pointer_key = 0;
+    bool processed_special_data = false;
+
+    // Send the message.
+    {
+        // Make sure that we end up in a clean state on error.
+        ON_RESULT_FAILURE {
+            // Cleanup special data.
+            if (processed_special_data) {
+                if (src_header.GetHasSpecialHeader()) {
+                    CleanupSpecialData(dst_process, dst_msg_ptr, dst_buffer_size);
+                }
+            } else {
+                CleanupServerHandles(kernel, src_user ? src_message_buffer : 0, src_buffer_size,
+                                     src_message_paddr);
+            }
+
+            // Cleanup mappings.
+            CleanupMap(request, std::addressof(src_process), std::addressof(dst_page_table));
+        };
+
+        // Ensure that the headers fit.
+        R_UNLESS(MessageBuffer::GetMessageBufferSize(src_header, src_special_header) <=
+                     src_buffer_size,
+                 ResultInvalidCombination);
+        R_UNLESS(MessageBuffer::GetMessageBufferSize(dst_header, dst_special_header) <=
+                     dst_buffer_size,
+                 ResultInvalidCombination);
+
+        // Ensure the receive list offset is after the end of raw data.
+        if (dst_header.GetReceiveListOffset()) {
+            R_UNLESS(dst_header.GetReceiveListOffset() >=
+                         MessageBuffer::GetRawDataIndex(dst_header, dst_special_header) +
+                             dst_header.GetRawCount(),
+                     ResultInvalidCombination);
+        }
+
+        // Ensure that the destination buffer is big enough to receive the source.
+        R_UNLESS(dst_buffer_size >= src_end_offset * sizeof(u32), ResultMessageTooLarge);
+
+        // Replies must have no buffers.
+        R_UNLESS(src_header.GetSendCount() == 0, ResultInvalidCombination);
+        R_UNLESS(src_header.GetReceiveCount() == 0, ResultInvalidCombination);
+        R_UNLESS(src_header.GetExchangeCount() == 0, ResultInvalidCombination);
+
+        // Get the receive list.
+        const s32 dst_recv_list_idx =
+            MessageBuffer::GetReceiveListIndex(dst_header, dst_special_header);
+        ReceiveList dst_recv_list(dst_msg_ptr, dst_message_buffer, dst_page_table, dst_header,
+                                  dst_special_header, dst_buffer_size, src_end_offset,
+                                  dst_recv_list_idx, !dst_user);
+
+        // Handle any receive buffers.
+        for (size_t i = 0; i < request->GetReceiveCount(); ++i) {
+            R_TRY(ProcessSendMessageReceiveMapping(
+                src_page_table, dst_page_table, request->GetReceiveClientAddress(i),
+                request->GetReceiveServerAddress(i), request->GetReceiveSize(i),
+                request->GetReceiveMemoryState(i)));
+        }
+
+        // Handle any exchange buffers.
+        for (size_t i = 0; i < request->GetExchangeCount(); ++i) {
+            R_TRY(ProcessSendMessageReceiveMapping(
+                src_page_table, dst_page_table, request->GetExchangeClientAddress(i),
+                request->GetExchangeServerAddress(i), request->GetExchangeSize(i),
+                request->GetExchangeMemoryState(i)));
+        }
+
+        // Set the header.
+        offset = dst_msg.Set(src_header);
+
+        // Process any special data.
+        ASSERT(GetCurrentThreadPointer(kernel) == std::addressof(src_thread));
+        processed_special_data = true;
+        if (src_header.GetHasSpecialHeader()) {
+            R_TRY(ProcessMessageSpecialData<true>(offset, dst_process, src_process, src_thread,
+                                                  dst_msg, src_msg, src_special_header));
+        }
+
+        // Process any pointer buffers.
+        for (auto i = 0; i < src_header.GetPointerCount(); ++i) {
+            R_TRY(ProcessSendMessagePointerDescriptors(
+                offset, pointer_key, src_page_table, dst_page_table, dst_msg, src_msg,
+                dst_recv_list,
+                dst_user &&
+                    dst_header.GetReceiveListCount() ==
+                        MessageBuffer::MessageHeader::ReceiveListCountType_ToMessageBuffer));
+        }
+
+        // Clear any map alias buffers.
+        for (auto i = 0; i < src_header.GetMapAliasCount(); ++i) {
+            offset = dst_msg.Set(offset, MessageBuffer::MapAliasDescriptor());
+        }
+
+        // Process any raw data.
+        if (const auto raw_count = src_header.GetRawCount(); raw_count != 0) {
+            // Get the offset and size.
+            const size_t offset_words = offset * sizeof(u32);
+            const size_t raw_size = raw_count * sizeof(u32);
+
+            if (!dst_user && !src_user) {
+                // Fast case is TLS -> TLS, do raw memcpy if we can.
+                std::memcpy(dst_msg_ptr + offset, src_msg_ptr + offset, raw_size);
+            } else if (src_user) {
+                // Determine how much fast size we can copy.
+                const size_t max_fast_size = std::min<size_t>(offset_words + raw_size, PageSize);
+                const size_t fast_size = max_fast_size - offset_words;
+
+                // Determine dst state; if user buffer, we require heap, and otherwise only linear
+                // mapped (to enable tls use).
+                const auto dst_state =
+                    dst_user ? KMemoryState::FlagReferenceCounted : KMemoryState::FlagLinearMapped;
+
+                // Determine the dst permission. User buffer should be unmapped + read, TLS should
+                // be user readable.
+                const KMemoryPermission dst_perm =
+                    dst_user ? KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite
+                             : KMemoryPermission::UserReadWrite;
+
+                // Perform the fast part of the copy.
+                R_TRY(dst_page_table.CopyMemoryFromKernelToLinear(
+                    dst_message_buffer + offset_words, fast_size, dst_state, dst_state, dst_perm,
+                    KMemoryAttribute::Uncached, KMemoryAttribute::None, src_msg_ptr + offset));
+
+                // If the fast part of the copy didn't get everything, perform the slow part of the
+                // copy.
+                if (fast_size < raw_size) {
+                    R_TRY(dst_page_table.CopyMemoryFromHeapToHeap(
+                        dst_page_table, dst_message_buffer + max_fast_size, raw_size - fast_size,
+                        dst_state, dst_state, dst_perm, KMemoryAttribute::Uncached,
+                        KMemoryAttribute::None, src_message_buffer + max_fast_size,
+                        KMemoryState::FlagReferenceCounted, KMemoryState::FlagReferenceCounted,
+                        KMemoryPermission::NotMapped | KMemoryPermission::KernelRead,
+                        KMemoryAttribute::Uncached | KMemoryAttribute::Locked,
+                        KMemoryAttribute::Locked));
+                }
+            } else /* if (dst_user) */ {
+                // The destination is a user buffer, so it should be unmapped + readable.
+                constexpr KMemoryPermission DestinationPermission =
+                    KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite;
+
+                // Copy the memory.
+                R_TRY(dst_page_table.CopyMemoryFromUserToLinear(
+                    dst_message_buffer + offset_words, raw_size, KMemoryState::FlagReferenceCounted,
+                    KMemoryState::FlagReferenceCounted, DestinationPermission,
+                    KMemoryAttribute::Uncached, KMemoryAttribute::None,
+                    src_message_buffer + offset_words));
+            }
+        }
+    }
+
+    // Perform (and validate) any remaining cleanup.
+    R_RETURN(CleanupMap(request, std::addressof(src_process), std::addressof(dst_page_table)));
 }
 
-Result KServerSession::OnRequest(KSessionRequest* request) {
-    // Create the wait queue.
-    ThreadQueueImplForKServerSessionRequest wait_queue{m_kernel};
+void ReplyAsyncError(KProcess* to_process, uint64_t to_msg_buf, size_t to_msg_buf_size,
+                     Result result) {
+    // Convert the address to a linear pointer.
+    u32* to_msg = to_process->GetMemory().GetPointer<u32>(to_msg_buf);
+
+    // Set the error.
+    MessageBuffer msg(to_msg, to_msg_buf_size);
+    msg.SetAsyncResult(result);
+}
+
+} // namespace
+
+KServerSession::KServerSession(KernelCore& kernel)
+    : KSynchronizationObject{kernel}, m_lock{m_kernel} {}
+
+KServerSession::~KServerSession() = default;
+
+void KServerSession::Destroy() {
+    m_parent->OnServerClosed();
+
+    this->CleanupRequests();
+
+    m_parent->Close();
+}
+
+Result KServerSession::ReceiveRequest(uintptr_t server_message, uintptr_t server_buffer_size,
+                                      KPhysicalAddress server_message_paddr,
+                                      std::shared_ptr<Service::HLERequestContext>* out_context,
+                                      std::weak_ptr<Service::SessionRequestManager> manager) {
+    // Lock the session.
+    KScopedLightLock lk{m_lock};
+
+    // Get the request and client thread.
+    KSessionRequest* request;
+    KThread* client_thread;
 
     {
-        // Lock the scheduler.
         KScopedSchedulerLock sl{m_kernel};
 
-        // Ensure that we can handle new requests.
-        R_UNLESS(!m_parent->IsServerClosed(), ResultSessionClosed);
+        // Ensure that we can service the request.
+        R_UNLESS(!m_parent->IsClientClosed(), ResultSessionClosed);
 
-        // Check that we're not terminating.
-        R_UNLESS(!GetCurrentThread(m_kernel).IsTerminationRequested(), ResultTerminationRequested);
+        // Ensure we aren't already servicing a request.
+        R_UNLESS(m_current_request == nullptr, ResultNotFound);
 
-        // Get whether we're empty.
-        const bool was_empty = m_request_list.empty();
+        // Ensure we have a request to service.
+        R_UNLESS(!m_request_list.empty(), ResultNotFound);
 
-        // Add the request to the list.
-        request->Open();
-        m_request_list.push_back(*request);
+        // Pop the first request from the list.
+        request = std::addressof(m_request_list.front());
+        m_request_list.pop_front();
 
-        // If we were empty, signal.
-        if (was_empty) {
-            this->NotifyAvailable();
+        // Get the thread for the request.
+        client_thread = request->GetThread();
+        R_UNLESS(client_thread != nullptr, ResultSessionClosed);
+
+        // Open the client thread.
+        client_thread->Open();
+    }
+
+    SCOPE_EXIT({ client_thread->Close(); });
+
+    // Set the request as our current.
+    m_current_request = request;
+
+    // Get the client address.
+    uint64_t client_message = request->GetAddress();
+    size_t client_buffer_size = request->GetSize();
+    bool recv_list_broken = false;
+
+    // Receive the message.
+    Result result = ResultSuccess;
+
+    if (out_context != nullptr) {
+        // HLE request.
+        if (!client_message) {
+            client_message = GetInteger(client_thread->GetTlsAddress());
         }
+        Core::Memory::Memory& memory{client_thread->GetOwnerProcess()->GetMemory()};
+        u32* cmd_buf{reinterpret_cast<u32*>(memory.GetPointer(client_message))};
+        *out_context =
+            std::make_shared<Service::HLERequestContext>(m_kernel, memory, this, client_thread);
+        (*out_context)->SetSessionRequestManager(manager);
+        (*out_context)
+            ->PopulateFromIncomingCommandBuffer(*client_thread->GetOwnerProcess(), cmd_buf);
+        // We succeeded.
+        R_SUCCEED();
+    } else {
+        result = ReceiveMessage(m_kernel, recv_list_broken, server_message, server_buffer_size,
+                                server_message_paddr, *client_thread, client_message,
+                                client_buffer_size, this, request);
+    }
 
-        // If we have a request event, this is asynchronous, and we don't need to wait.
-        R_SUCCEED_IF(request->GetEvent() != nullptr);
+    // Handle cleanup on receive failure.
+    if (R_FAILED(result)) {
+        // Cache the result to return it to the client.
+        const Result result_for_client = result;
 
-        // This is a synchronous request, so we should wait for our request to complete.
-        GetCurrentThread(m_kernel).SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::IPC);
-        GetCurrentThread(m_kernel).BeginWait(std::addressof(wait_queue));
+        // Clear the current request.
+        {
+            KScopedSchedulerLock sl(m_kernel);
+            ASSERT(m_current_request == request);
+            m_current_request = nullptr;
+            if (!m_request_list.empty()) {
+                this->NotifyAvailable();
+            }
+        }
+
+        // Reply to the client.
+        {
+            // After we reply, close our reference to the request.
+            SCOPE_EXIT({ request->Close(); });
+
+            // Get the event to check whether the request is async.
+            if (KEvent* event = request->GetEvent(); event != nullptr) {
+                // The client sent an async request.
+                KProcess* client = client_thread->GetOwnerProcess();
+                auto& client_pt = client->GetPageTable();
+
+                // Send the async result.
+                if (R_FAILED(result_for_client)) {
+                    ReplyAsyncError(client, client_message, client_buffer_size, result_for_client);
+                }
+
+                // Unlock the client buffer.
+                // NOTE: Nintendo does not check the result of this.
+                client_pt.UnlockForIpcUserBuffer(client_message, client_buffer_size);
+
+                // Signal the event.
+                event->Signal();
+            } else {
+                // End the client thread's wait.
+                KScopedSchedulerLock sl(m_kernel);
+
+                if (!client_thread->IsTerminationRequested()) {
+                    client_thread->EndWait(result_for_client);
+                }
+            }
+        }
+
+        // Set the server result.
+        if (recv_list_broken) {
+            result = ResultReceiveListBroken;
+        } else {
+            result = ResultNotFound;
+        }
     }
 
-    return GetCurrentThread(m_kernel).GetWaitResult();
+    R_RETURN(result);
 }
 
-Result KServerSession::SendReply(bool is_hle) {
+Result KServerSession::SendReply(uintptr_t server_message, uintptr_t server_buffer_size,
+                                 KPhysicalAddress server_message_paddr, bool is_hle) {
     // Lock the session.
     KScopedLightLock lk{m_lock};
 
@@ -327,7 +1240,7 @@ Result KServerSession::SendReply(bool is_hle) {
     SCOPE_EXIT({ request->Close(); });
 
     // Extract relevant information from the request.
-    const uintptr_t client_message = request->GetAddress();
+    const uint64_t client_message = request->GetAddress();
     const size_t client_buffer_size = request->GetSize();
     KThread* client_thread = request->GetThread();
     KEvent* event = request->GetEvent();
@@ -342,31 +1255,28 @@ Result KServerSession::SendReply(bool is_hle) {
             // HLE servers write directly to a pointer to the thread command buffer. Therefore
             // the reply has already been written in this case.
         } else {
-            Core::Memory::Memory& memory{client_thread->GetOwnerProcess()->GetMemory()};
-            KThread* server_thread = GetCurrentThreadPointer(m_kernel);
-            KProcess& src_process = *client_thread->GetOwnerProcess();
-            KProcess& dst_process = *server_thread->GetOwnerProcess();
-            UNIMPLEMENTED_IF(server_thread->GetOwnerProcess() != client_thread->GetOwnerProcess());
-
-            auto* src_msg_buffer = memory.GetPointer<u32>(server_thread->GetTlsAddress());
-            auto* dst_msg_buffer = memory.GetPointer<u32>(client_message);
-            std::memcpy(dst_msg_buffer, src_msg_buffer, client_buffer_size);
-
-            // Translate special header ad-hoc.
-            MessageBuffer src_msg(src_msg_buffer, client_buffer_size);
-            MessageBuffer::MessageHeader src_header(src_msg);
-            MessageBuffer::SpecialHeader src_special_header(src_msg, src_header);
-            if (src_header.GetHasSpecialHeader()) {
-                MessageBuffer dst_msg(dst_msg_buffer, client_buffer_size);
-                result = ProcessMessageSpecialData<true>(dst_process, src_process, *server_thread,
-                                                         dst_msg, src_msg, src_special_header);
-                if (R_FAILED(result)) {
-                    CleanupSpecialData(dst_process, dst_msg_buffer, client_buffer_size);
-                }
-            }
+            result = SendMessage(m_kernel, server_message, server_buffer_size, server_message_paddr,
+                                 *client_thread, client_message, client_buffer_size, this, request);
+        }
+    } else if (!is_hle) {
+        // Otherwise, we'll need to do some cleanup.
+        KProcess* server_process = request->GetServerProcess();
+        KProcess* client_process =
+            (client_thread != nullptr) ? client_thread->GetOwnerProcess() : nullptr;
+        KProcessPageTable* client_page_table =
+            (client_process != nullptr) ? std::addressof(client_process->GetPageTable()) : nullptr;
+
+        // Cleanup server handles.
+        result = CleanupServerHandles(m_kernel, server_message, server_buffer_size,
+                                      server_message_paddr);
+
+        // Cleanup mappings.
+        Result cleanup_map_result = CleanupMap(request, server_process, client_page_table);
+
+        // If we successfully cleaned up handles, use the map cleanup result as our result.
+        if (R_SUCCEEDED(result)) {
+            result = cleanup_map_result;
         }
-    } else {
-        result = ResultSessionClosed;
     }
 
     // Select a result for the client.
@@ -381,19 +1291,18 @@ Result KServerSession::SendReply(bool is_hle) {
     // If there's a client thread, update it.
     if (client_thread != nullptr) {
         if (event != nullptr) {
-            // // Get the client process/page table.
-            // KProcess *client_process             = client_thread->GetOwnerProcess();
-            // KProcessPageTable *client_page_table = std::addressof(client_process->PageTable());
+            // Get the client process/page table.
+            KProcess* client_process = client_thread->GetOwnerProcess();
+            KProcessPageTable* client_page_table = std::addressof(client_process->GetPageTable());
 
-            // // If we need to, reply with an async error.
-            // if (R_FAILED(client_result)) {
-            //     ReplyAsyncError(client_process, client_message, client_buffer_size,
-            //     client_result);
-            // }
+            // If we need to, reply with an async error.
+            if (R_FAILED(client_result)) {
+                ReplyAsyncError(client_process, client_message, client_buffer_size, client_result);
+            }
 
-            // // Unlock the client buffer.
-            // // NOTE: Nintendo does not check the result of this.
-            // client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size);
+            // Unlock the client buffer.
+            // NOTE: Nintendo does not check the result of this.
+            client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size);
 
             // Signal the event.
             event->Signal();
@@ -410,91 +1319,53 @@ Result KServerSession::SendReply(bool is_hle) {
     R_RETURN(result);
 }
 
-Result KServerSession::ReceiveRequest(std::shared_ptr<Service::HLERequestContext>* out_context,
-                                      std::weak_ptr<Service::SessionRequestManager> manager) {
-    // Lock the session.
-    KScopedLightLock lk{m_lock};
-
-    // Get the request and client thread.
-    KSessionRequest* request;
-    KThread* client_thread;
+Result KServerSession::OnRequest(KSessionRequest* request) {
+    // Create the wait queue.
+    ThreadQueueImplForKServerSessionRequest wait_queue{m_kernel};
 
     {
+        // Lock the scheduler.
         KScopedSchedulerLock sl{m_kernel};
 
-        // Ensure that we can service the request.
-        R_UNLESS(!m_parent->IsClientClosed(), ResultSessionClosed);
-
-        // Ensure we aren't already servicing a request.
-        R_UNLESS(m_current_request == nullptr, ResultNotFound);
+        // Ensure that we can handle new requests.
+        R_UNLESS(!m_parent->IsServerClosed(), ResultSessionClosed);
 
-        // Ensure we have a request to service.
-        R_UNLESS(!m_request_list.empty(), ResultNotFound);
+        // Check that we're not terminating.
+        R_UNLESS(!GetCurrentThread(m_kernel).IsTerminationRequested(), ResultTerminationRequested);
 
-        // Pop the first request from the list.
-        request = std::addressof(m_request_list.front());
-        m_request_list.pop_front();
+        // Get whether we're empty.
+        const bool was_empty = m_request_list.empty();
 
-        // Get the thread for the request.
-        client_thread = request->GetThread();
-        R_UNLESS(client_thread != nullptr, ResultSessionClosed);
+        // Add the request to the list.
+        request->Open();
+        m_request_list.push_back(*request);
 
-        // Open the client thread.
-        client_thread->Open();
-    }
+        // If we were empty, signal.
+        if (was_empty) {
+            this->NotifyAvailable();
+        }
 
-    SCOPE_EXIT({ client_thread->Close(); });
+        // If we have a request event, this is asynchronous, and we don't need to wait.
+        R_SUCCEED_IF(request->GetEvent() != nullptr);
 
-    // Set the request as our current.
-    m_current_request = request;
+        // This is a synchronous request, so we should wait for our request to complete.
+        GetCurrentThread(m_kernel).SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::IPC);
+        GetCurrentThread(m_kernel).BeginWait(std::addressof(wait_queue));
+    }
 
-    // Get the client address.
-    uintptr_t client_message = request->GetAddress();
-    size_t client_buffer_size = request->GetSize();
-    // bool recv_list_broken = false;
+    return GetCurrentThread(m_kernel).GetWaitResult();
+}
 
-    if (!client_message) {
-        client_message = GetInteger(client_thread->GetTlsAddress());
-        client_buffer_size = MessageBufferSize;
-    }
+bool KServerSession::IsSignaled() const {
+    ASSERT(KScheduler::IsSchedulerLockedByCurrentThread(m_kernel));
 
-    // Receive the message.
-    Core::Memory::Memory& memory{client_thread->GetOwnerProcess()->GetMemory()};
-    if (out_context != nullptr) {
-        // HLE request.
-        u32* cmd_buf{reinterpret_cast<u32*>(memory.GetPointer(client_message))};
-        *out_context =
-            std::make_shared<Service::HLERequestContext>(m_kernel, memory, this, client_thread);
-        (*out_context)->SetSessionRequestManager(manager);
-        (*out_context)
-            ->PopulateFromIncomingCommandBuffer(*client_thread->GetOwnerProcess(), cmd_buf);
-    } else {
-        KThread* server_thread = GetCurrentThreadPointer(m_kernel);
-        KProcess& src_process = *client_thread->GetOwnerProcess();
-        KProcess& dst_process = *server_thread->GetOwnerProcess();
-        UNIMPLEMENTED_IF(client_thread->GetOwnerProcess() != server_thread->GetOwnerProcess());
-
-        auto* src_msg_buffer = memory.GetPointer<u32>(client_message);
-        auto* dst_msg_buffer = memory.GetPointer<u32>(server_thread->GetTlsAddress());
-        std::memcpy(dst_msg_buffer, src_msg_buffer, client_buffer_size);
-
-        // Translate special header ad-hoc.
-        // TODO: fix this mess
-        MessageBuffer src_msg(src_msg_buffer, client_buffer_size);
-        MessageBuffer::MessageHeader src_header(src_msg);
-        MessageBuffer::SpecialHeader src_special_header(src_msg, src_header);
-        if (src_header.GetHasSpecialHeader()) {
-            MessageBuffer dst_msg(dst_msg_buffer, client_buffer_size);
-            Result res = ProcessMessageSpecialData<false>(dst_process, src_process, *client_thread,
-                                                          dst_msg, src_msg, src_special_header);
-            if (R_FAILED(res)) {
-                CleanupSpecialData(dst_process, dst_msg_buffer, client_buffer_size);
-            }
-        }
+    // If the client is closed, we're always signaled.
+    if (m_parent->IsClientClosed()) {
+        return true;
     }
 
-    // We succeeded.
-    R_SUCCEED();
+    // Otherwise, we're signaled if we have a request and aren't handling one.
+    return !m_request_list.empty() && m_current_request == nullptr;
 }
 
 void KServerSession::CleanupRequests() {
@@ -527,31 +1398,30 @@ void KServerSession::CleanupRequests() {
         SCOPE_EXIT({ request->Close(); });
 
         // Extract relevant information from the request.
-        // const uintptr_t client_message  = request->GetAddress();
-        // const size_t client_buffer_size = request->GetSize();
+        const uint64_t client_message = request->GetAddress();
+        const size_t client_buffer_size = request->GetSize();
         KThread* client_thread = request->GetThread();
         KEvent* event = request->GetEvent();
 
-        // KProcess *server_process             = request->GetServerProcess();
-        // KProcess *client_process             = (client_thread != nullptr) ?
-        //                                         client_thread->GetOwnerProcess() : nullptr;
-        // KProcessPageTable *client_page_table = (client_process != nullptr) ?
-        //                                         std::addressof(client_process->GetPageTable())
-        //                                         : nullptr;
+        KProcess* server_process = request->GetServerProcess();
+        KProcess* client_process =
+            (client_thread != nullptr) ? client_thread->GetOwnerProcess() : nullptr;
+        KProcessPageTable* client_page_table =
+            (client_process != nullptr) ? std::addressof(client_process->GetPageTable()) : nullptr;
 
         // Cleanup the mappings.
-        // Result result = CleanupMap(request, server_process, client_page_table);
+        Result result = CleanupMap(request, server_process, client_page_table);
 
         // If there's a client thread, update it.
         if (client_thread != nullptr) {
             if (event != nullptr) {
-                // // We need to reply async.
-                // ReplyAsyncError(client_process, client_message, client_buffer_size,
-                //                 (R_SUCCEEDED(result) ? ResultSessionClosed : result));
+                // We need to reply async.
+                ReplyAsyncError(client_process, client_message, client_buffer_size,
+                                (R_SUCCEEDED(result) ? ResultSessionClosed : result));
 
-                // // Unlock the client buffer.
+                // Unlock the client buffer.
                 // NOTE: Nintendo does not check the result of this.
-                // client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size);
+                client_page_table->UnlockForIpcUserBuffer(client_message, client_buffer_size);
 
                 // Signal the event.
                 event->Signal();
@@ -567,4 +1437,97 @@ void KServerSession::CleanupRequests() {
     }
 }
 
+void KServerSession::OnClientClosed() {
+    KScopedLightLock lk{m_lock};
+
+    // Handle any pending requests.
+    KSessionRequest* prev_request = nullptr;
+    while (true) {
+        // Declare variables for processing the request.
+        KSessionRequest* request = nullptr;
+        KEvent* event = nullptr;
+        KThread* thread = nullptr;
+        bool cur_request = false;
+        bool terminate = false;
+
+        // Get the next request.
+        {
+            KScopedSchedulerLock sl{m_kernel};
+
+            if (m_current_request != nullptr && m_current_request != prev_request) {
+                // Set the request, open a reference as we process it.
+                request = m_current_request;
+                request->Open();
+                cur_request = true;
+
+                // Get thread and event for the request.
+                thread = request->GetThread();
+                event = request->GetEvent();
+
+                // If the thread is terminating, handle that.
+                if (thread->IsTerminationRequested()) {
+                    request->ClearThread();
+                    request->ClearEvent();
+                    terminate = true;
+                }
+
+                prev_request = request;
+            } else if (!m_request_list.empty()) {
+                // Pop the request from the front of the list.
+                request = std::addressof(m_request_list.front());
+                m_request_list.pop_front();
+
+                // Get thread and event for the request.
+                thread = request->GetThread();
+                event = request->GetEvent();
+            }
+        }
+
+        // If there are no requests, we're done.
+        if (request == nullptr) {
+            break;
+        }
+
+        // All requests must have threads.
+        ASSERT(thread != nullptr);
+
+        // Ensure that we close the request when done.
+        SCOPE_EXIT({ request->Close(); });
+
+        // If we're terminating, close a reference to the thread and event.
+        if (terminate) {
+            thread->Close();
+            if (event != nullptr) {
+                event->Close();
+            }
+        }
+
+        // If we need to, reply.
+        if (event != nullptr && !cur_request) {
+            // There must be no mappings.
+            ASSERT(request->GetSendCount() == 0);
+            ASSERT(request->GetReceiveCount() == 0);
+            ASSERT(request->GetExchangeCount() == 0);
+
+            // Get the process and page table.
+            KProcess* client_process = thread->GetOwnerProcess();
+            auto& client_pt = client_process->GetPageTable();
+
+            // Reply to the request.
+            ReplyAsyncError(client_process, request->GetAddress(), request->GetSize(),
+                            ResultSessionClosed);
+
+            // Unlock the buffer.
+            // NOTE: Nintendo does not check the result of this.
+            client_pt.UnlockForIpcUserBuffer(request->GetAddress(), request->GetSize());
+
+            // Signal the event.
+            event->Signal();
+        }
+    }
+
+    // Notify.
+    this->NotifyAvailable(ResultSessionClosed);
+}
+
 } // namespace Kernel
diff --git a/src/core/hle/kernel/k_server_session.h b/src/core/hle/kernel/k_server_session.h
index 403891919..2876c231b 100644
--- a/src/core/hle/kernel/k_server_session.h
+++ b/src/core/hle/kernel/k_server_session.h
@@ -49,14 +49,21 @@ public:
     bool IsSignaled() const override;
     void OnClientClosed();
 
-    /// TODO: flesh these out to match the real kernel
     Result OnRequest(KSessionRequest* request);
-    Result SendReply(bool is_hle = false);
-    Result ReceiveRequest(std::shared_ptr<Service::HLERequestContext>* out_context = nullptr,
+    Result SendReply(uintptr_t server_message, uintptr_t server_buffer_size,
+                     KPhysicalAddress server_message_paddr, bool is_hle = false);
+    Result ReceiveRequest(uintptr_t server_message, uintptr_t server_buffer_size,
+                          KPhysicalAddress server_message_paddr,
+                          std::shared_ptr<Service::HLERequestContext>* out_context = nullptr,
                           std::weak_ptr<Service::SessionRequestManager> manager = {});
 
     Result SendReplyHLE() {
-        return SendReply(true);
+        R_RETURN(this->SendReply(0, 0, 0, true));
+    }
+
+    Result ReceiveRequestHLE(std::shared_ptr<Service::HLERequestContext>* out_context,
+                             std::weak_ptr<Service::SessionRequestManager> manager) {
+        R_RETURN(this->ReceiveRequest(0, 0, 0, out_context, manager));
     }
 
 private:
diff --git a/src/core/hle/kernel/k_session.cpp b/src/core/hle/kernel/k_session.cpp
index 44d7a8f02..4a1f6027e 100644
--- a/src/core/hle/kernel/k_session.cpp
+++ b/src/core/hle/kernel/k_session.cpp
@@ -33,8 +33,7 @@ void KSession::Initialize(KClientPort* client_port, uintptr_t name) {
     m_name = name;
 
     // Set our owner process.
-    //! FIXME: this is the wrong process!
-    m_process = m_kernel.ApplicationProcess();
+    m_process = GetCurrentProcessPointer(m_kernel);
     m_process->Open();
 
     // Set our port.
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index 7d9a6e9cf..24394d222 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -1422,8 +1422,7 @@ s32 GetCurrentCoreId(KernelCore& kernel) {
 }
 
 Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel) {
-    // TODO: per-process memory
-    return kernel.System().ApplicationMemory();
+    return GetCurrentProcess(kernel).GetMemory();
 }
 
 KScopedDisableDispatch::~KScopedDisableDispatch() {
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index e9925d231..f13e232b2 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -314,11 +314,7 @@ public:
         m_current_core_id = core;
     }
 
-    KProcess* GetOwnerProcess() {
-        return m_parent;
-    }
-
-    const KProcess* GetOwnerProcess() const {
+    KProcess* GetOwnerProcess() const {
         return m_parent;
     }
 
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index e479dacde..c14d2d2f3 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -68,8 +68,6 @@ struct KernelCore::Impl {
 
         global_object_list_container = std::make_unique<KAutoObjectWithListContainer>(kernel);
         global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
-        global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel);
-        global_handle_table->Initialize(KHandleTable::MaxTableSize);
 
         is_phantom_mode_for_singlecore = false;
 
@@ -121,13 +119,8 @@ struct KernelCore::Impl {
         next_user_process_id = KProcess::ProcessIdMin;
         next_thread_id = 1;
 
-        global_handle_table->Finalize();
-        global_handle_table.reset();
-
         preemption_event = nullptr;
 
-        exclusive_monitor.reset();
-
         // Cleanup persistent kernel objects
         auto CleanupObject = [](KAutoObject* obj) {
             if (obj) {
@@ -191,8 +184,6 @@ struct KernelCore::Impl {
     }
 
     void InitializePhysicalCores() {
-        exclusive_monitor =
-            Core::MakeExclusiveMonitor(system.ApplicationMemory(), Core::Hardware::NUM_CPU_CORES);
         for (u32 i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
             const s32 core{static_cast<s32>(i)};
 
@@ -791,10 +782,6 @@ struct KernelCore::Impl {
 
     std::shared_ptr<Core::Timing::EventType> preemption_event;
 
-    // This is the kernel's handle table or supervisor handle table which
-    // stores all the objects in place.
-    std::unique_ptr<KHandleTable> global_handle_table;
-
     std::unique_ptr<KAutoObjectWithListContainer> global_object_list_container;
 
     std::unique_ptr<KObjectNameGlobalData> object_name_global_data;
@@ -805,7 +792,6 @@ struct KernelCore::Impl {
     std::mutex server_lock;
     std::vector<std::unique_ptr<Service::ServerManager>> server_managers;
 
-    std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
     std::array<std::unique_ptr<Kernel::PhysicalCore>, Core::Hardware::NUM_CPU_CORES> cores;
 
     // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
@@ -882,10 +868,6 @@ KResourceLimit* KernelCore::GetSystemResourceLimit() {
     return impl->system_resource_limit;
 }
 
-KScopedAutoObject<KThread> KernelCore::RetrieveThreadFromGlobalHandleTable(Handle handle) const {
-    return impl->global_handle_table->GetObject<KThread>(handle);
-}
-
 void KernelCore::AppendNewProcess(KProcess* process) {
     impl->process_list.push_back(process);
 }
@@ -959,14 +941,6 @@ Kernel::KHardwareTimer& KernelCore::HardwareTimer() {
     return *impl->hardware_timer;
 }
 
-Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() {
-    return *impl->exclusive_monitor;
-}
-
-const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
-    return *impl->exclusive_monitor;
-}
-
 KAutoObjectWithListContainer& KernelCore::ObjectListContainer() {
     return *impl->global_object_list_container;
 }
@@ -1030,14 +1004,6 @@ u64 KernelCore::CreateNewUserProcessID() {
     return impl->next_user_process_id++;
 }
 
-KHandleTable& KernelCore::GlobalHandleTable() {
-    return *impl->global_handle_table;
-}
-
-const KHandleTable& KernelCore::GlobalHandleTable() const {
-    return *impl->global_handle_table;
-}
-
 void KernelCore::RegisterCoreThread(std::size_t core_id) {
     impl->RegisterCoreThread(core_id);
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 78c88902c..5d4102145 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -116,9 +116,6 @@ public:
     /// Retrieves a shared pointer to the system resource limit instance.
     KResourceLimit* GetSystemResourceLimit();
 
-    /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table.
-    KScopedAutoObject<KThread> RetrieveThreadFromGlobalHandleTable(Handle handle) const;
-
     /// Adds the given shared pointer to an internal list of active processes.
     void AppendNewProcess(KProcess* process);
 
@@ -170,10 +167,6 @@ public:
     /// Stops execution of 'id' core, in order to reschedule a new thread.
     void PrepareReschedule(std::size_t id);
 
-    Core::ExclusiveMonitor& GetExclusiveMonitor();
-
-    const Core::ExclusiveMonitor& GetExclusiveMonitor() const;
-
     KAutoObjectWithListContainer& ObjectListContainer();
 
     const KAutoObjectWithListContainer& ObjectListContainer() const;
diff --git a/src/core/hle/kernel/message_buffer.h b/src/core/hle/kernel/message_buffer.h
index 75b275310..d528a9bb3 100644
--- a/src/core/hle/kernel/message_buffer.h
+++ b/src/core/hle/kernel/message_buffer.h
@@ -18,13 +18,13 @@ public:
         static constexpr inline u64 NullTag = 0;
 
     public:
-        enum class ReceiveListCountType : u32 {
-            None = 0,
-            ToMessageBuffer = 1,
-            ToSingleBuffer = 2,
+        enum ReceiveListCountType : u32 {
+            ReceiveListCountType_None = 0,
+            ReceiveListCountType_ToMessageBuffer = 1,
+            ReceiveListCountType_ToSingleBuffer = 2,
 
-            CountOffset = 2,
-            CountMax = 13,
+            ReceiveListCountType_CountOffset = 2,
+            ReceiveListCountType_CountMax = 13,
         };
 
     private:
@@ -591,16 +591,16 @@ public:
         // Add the size of the receive list.
         const auto count = hdr.GetReceiveListCount();
         switch (count) {
-        case MessageHeader::ReceiveListCountType::None:
+        case MessageHeader::ReceiveListCountType_None:
             break;
-        case MessageHeader::ReceiveListCountType::ToMessageBuffer:
+        case MessageHeader::ReceiveListCountType_ToMessageBuffer:
             break;
-        case MessageHeader::ReceiveListCountType::ToSingleBuffer:
+        case MessageHeader::ReceiveListCountType_ToSingleBuffer:
             msg_size += ReceiveListEntry::GetDataSize();
             break;
         default:
             msg_size += (static_cast<s32>(count) -
-                         static_cast<s32>(MessageHeader::ReceiveListCountType::CountOffset)) *
+                         static_cast<s32>(MessageHeader::ReceiveListCountType_CountOffset)) *
                         ReceiveListEntry::GetDataSize();
             break;
         }
diff --git a/src/core/hle/kernel/svc/svc_info.cpp b/src/core/hle/kernel/svc/svc_info.cpp
index ada998772..231e4d0e1 100644
--- a/src/core/hle/kernel/svc/svc_info.cpp
+++ b/src/core/hle/kernel/svc/svc_info.cpp
@@ -118,7 +118,6 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle
             R_SUCCEED();
 
         case InfoType::IsApplication:
-            LOG_WARNING(Kernel_SVC, "(STUBBED) Assuming process is application");
             *result = process->IsApplication();
             R_SUCCEED();
 
diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp
index 47a3e7bb0..85cc4f561 100644
--- a/src/core/hle/kernel/svc/svc_ipc.cpp
+++ b/src/core/hle/kernel/svc/svc_ipc.cpp
@@ -48,8 +48,7 @@ Result ReplyAndReceiveImpl(KernelCore& kernel, int32_t* out_index, uintptr_t mes
         };
 
         // Send the reply.
-        R_TRY(session->SendReply());
-        // R_TRY(session->SendReply(message, buffer_size, message_paddr));
+        R_TRY(session->SendReply(message, buffer_size, message_paddr));
     }
 
     // Receive a message.
@@ -85,8 +84,7 @@ Result ReplyAndReceiveImpl(KernelCore& kernel, int32_t* out_index, uintptr_t mes
             if (R_SUCCEEDED(result)) {
                 KServerSession* session = objs[index]->DynamicCast<KServerSession*>();
                 if (session != nullptr) {
-                    // result = session->ReceiveRequest(message, buffer_size, message_paddr);
-                    result = session->ReceiveRequest();
+                    result = session->ReceiveRequest(message, buffer_size, message_paddr);
                     if (ResultNotFound == result) {
                         continue;
                     }
diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h
index e1ad78607..38e71d516 100644
--- a/src/core/hle/kernel/svc_results.h
+++ b/src/core/hle/kernel/svc_results.h
@@ -38,7 +38,9 @@ constexpr Result ResultInvalidState{ErrorModule::Kernel, 125};
 constexpr Result ResultReservedUsed{ErrorModule::Kernel, 126};
 constexpr Result ResultPortClosed{ErrorModule::Kernel, 131};
 constexpr Result ResultLimitReached{ErrorModule::Kernel, 132};
+constexpr Result ResultReceiveListBroken{ErrorModule::Kernel, 258};
 constexpr Result ResultOutOfAddressSpace{ErrorModule::Kernel, 259};
+constexpr Result ResultMessageTooLarge{ErrorModule::Kernel, 260};
 constexpr Result ResultInvalidId{ErrorModule::Kernel, 519};
 
 } // namespace Kernel
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index fe2ed8df8..31da86074 100644
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -89,7 +89,7 @@ static void GenerateErrorReport(Core::System& system, Result error_code, const F
         crash_report += fmt::format("    ESR:                         {:016x}\n", info.esr);
         crash_report += fmt::format("    FAR:                         {:016x}\n", info.far);
         crash_report += "\nBacktrace:\n";
-        for (size_t i = 0; i < info.backtrace_size; i++) {
+        for (u32 i = 0; i < std::min<u32>(info.backtrace_size, 32); i++) {
             crash_report +=
                 fmt::format("    Backtrace[{:02d}]:               {:016x}\n", i, info.backtrace[i]);
         }
diff --git a/src/core/hle/service/hid/hid_server.cpp b/src/core/hle/service/hid/hid_server.cpp
index de24b0401..06a01c02c 100644
--- a/src/core/hle/service/hid/hid_server.cpp
+++ b/src/core/hle/service/hid/hid_server.cpp
@@ -51,7 +51,7 @@ private:
         IPC::RequestParser rp{ctx};
         const auto vibration_device_handle{rp.PopRaw<Core::HID::VibrationDeviceHandle>()};
 
-        if (resource_manager != nullptr) {
+        if (resource_manager != nullptr && resource_manager->GetNpad()) {
             resource_manager->GetNpad()->InitializeVibrationDevice(vibration_device_handle);
         }
 
diff --git a/src/core/hle/service/ipc_helpers.h b/src/core/hle/service/ipc_helpers.h
index 0e222362e..4b02872fb 100644
--- a/src/core/hle/service/ipc_helpers.h
+++ b/src/core/hle/service/ipc_helpers.h
@@ -151,8 +151,8 @@ public:
         if (manager->IsDomain()) {
             context->AddDomainObject(std::move(iface));
         } else {
-            kernel.ApplicationProcess()->GetResourceLimit()->Reserve(
-                Kernel::LimitableResource::SessionCountMax, 1);
+            ASSERT(Kernel::GetCurrentProcess(kernel).GetResourceLimit()->Reserve(
+                Kernel::LimitableResource::SessionCountMax, 1));
 
             auto* session = Kernel::KSession::Create(kernel);
             session->Initialize(nullptr, 0);
diff --git a/src/core/hle/service/server_manager.cpp b/src/core/hle/service/server_manager.cpp
index 6808247a9..15edb23e0 100644
--- a/src/core/hle/service/server_manager.cpp
+++ b/src/core/hle/service/server_manager.cpp
@@ -47,7 +47,7 @@ ServerManager::~ServerManager() {
     m_stopped.Wait();
     m_threads.clear();
 
-    // Clean up ports.
+    // Clean up server ports.
     for (const auto& [port, handler] : m_ports) {
         port->Close();
     }
@@ -97,22 +97,15 @@ Result ServerManager::RegisterNamedService(const std::string& service_name,
                                            u32 max_sessions) {
     ASSERT(m_sessions.size() + m_ports.size() < MaximumWaitObjects);
 
-    // Add the new server to sm:.
-    ASSERT(R_SUCCEEDED(
-        m_system.ServiceManager().RegisterService(service_name, max_sessions, handler_factory)));
-
-    // Get the registered port.
-    Kernel::KPort* port{};
-    ASSERT(
-        R_SUCCEEDED(m_system.ServiceManager().GetServicePort(std::addressof(port), service_name)));
-
-    // Open a new reference to the server port.
-    port->GetServerPort().Open();
+    // Add the new server to sm: and get the moved server port.
+    Kernel::KServerPort* server_port{};
+    R_ASSERT(m_system.ServiceManager().RegisterService(std::addressof(server_port), service_name,
+                                                       max_sessions, handler_factory));
 
     // Begin tracking the server port.
     {
         std::scoped_lock ll{m_list_mutex};
-        m_ports.emplace(std::addressof(port->GetServerPort()), std::move(handler_factory));
+        m_ports.emplace(server_port, std::move(handler_factory));
     }
 
     // Signal the wakeup event.
@@ -372,7 +365,7 @@ Result ServerManager::OnSessionEvent(Kernel::KServerSession* session,
 
     // Try to receive a message.
     std::shared_ptr<HLERequestContext> context;
-    rc = session->ReceiveRequest(&context, manager);
+    rc = session->ReceiveRequestHLE(&context, manager);
 
     // If the session has been closed, we're done.
     if (rc == Kernel::ResultSessionClosed) {
diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp
index 0653779d5..8e637f963 100644
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -507,6 +507,14 @@ void SET_SYS::SetTvSettings(HLERequestContext& ctx) {
     rb.Push(ResultSuccess);
 }
 
+void SET_SYS::GetDebugModeFlag(HLERequestContext& ctx) {
+    LOG_DEBUG(Service_SET, "called");
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(ResultSuccess);
+    rb.Push<u32>(0);
+}
+
 void SET_SYS::GetQuestFlag(HLERequestContext& ctx) {
     LOG_WARNING(Service_SET, "(STUBBED) called");
 
@@ -926,7 +934,7 @@ SET_SYS::SET_SYS(Core::System& system_) : ServiceFramework{system_, "set:sys"},
         {59, &SET_SYS::SetNetworkSystemClockContext, "SetNetworkSystemClockContext"},
         {60, &SET_SYS::IsUserSystemClockAutomaticCorrectionEnabled, "IsUserSystemClockAutomaticCorrectionEnabled"},
         {61, &SET_SYS::SetUserSystemClockAutomaticCorrectionEnabled, "SetUserSystemClockAutomaticCorrectionEnabled"},
-        {62, nullptr, "GetDebugModeFlag"},
+        {62, &SET_SYS::GetDebugModeFlag, "GetDebugModeFlag"},
         {63, &SET_SYS::GetPrimaryAlbumStorage, "GetPrimaryAlbumStorage"},
         {64, nullptr, "SetPrimaryAlbumStorage"},
         {65, nullptr, "GetUsb30EnableFlag"},
@@ -1143,6 +1151,8 @@ void SET_SYS::StoreSettings() {
 }
 
 void SET_SYS::StoreSettingsThreadFunc(std::stop_token stop_token) {
+    Common::SetCurrentThreadName("SettingsStore");
+
     while (Common::StoppableTimedWait(stop_token, std::chrono::minutes(1))) {
         std::scoped_lock l{m_save_needed_mutex};
         if (!std::exchange(m_save_needed, false)) {
diff --git a/src/core/hle/service/set/set_sys.h b/src/core/hle/service/set/set_sys.h
index 3785d93d8..853f76fce 100644
--- a/src/core/hle/service/set/set_sys.h
+++ b/src/core/hle/service/set/set_sys.h
@@ -98,6 +98,7 @@ private:
     void GetSettingsItemValue(HLERequestContext& ctx);
     void GetTvSettings(HLERequestContext& ctx);
     void SetTvSettings(HLERequestContext& ctx);
+    void GetDebugModeFlag(HLERequestContext& ctx);
     void GetQuestFlag(HLERequestContext& ctx);
     void GetDeviceTimeZoneLocationName(HLERequestContext& ctx);
     void SetDeviceTimeZoneLocationName(HLERequestContext& ctx);
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 296ee6e89..1095dcf6c 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -29,8 +29,7 @@ ServiceManager::ServiceManager(Kernel::KernelCore& kernel_) : kernel{kernel_} {
 
 ServiceManager::~ServiceManager() {
     for (auto& [name, port] : service_ports) {
-        port->GetClientPort().Close();
-        port->GetServerPort().Close();
+        port->Close();
     }
 
     if (deferral_event) {
@@ -50,8 +49,8 @@ static Result ValidateServiceName(const std::string& name) {
     return ResultSuccess;
 }
 
-Result ServiceManager::RegisterService(std::string name, u32 max_sessions,
-                                       SessionRequestHandlerFactory handler) {
+Result ServiceManager::RegisterService(Kernel::KServerPort** out_server_port, std::string name,
+                                       u32 max_sessions, SessionRequestHandlerFactory handler) {
     R_TRY(ValidateServiceName(name));
 
     std::scoped_lock lk{lock};
@@ -66,13 +65,17 @@ Result ServiceManager::RegisterService(std::string name, u32 max_sessions,
     // Register the port.
     Kernel::KPort::Register(kernel, port);
 
-    service_ports.emplace(name, port);
+    service_ports.emplace(name, std::addressof(port->GetClientPort()));
     registered_services.emplace(name, handler);
     if (deferral_event) {
         deferral_event->Signal();
     }
 
-    return ResultSuccess;
+    // Set our output.
+    *out_server_port = std::addressof(port->GetServerPort());
+
+    // We succeeded.
+    R_SUCCEED();
 }
 
 Result ServiceManager::UnregisterService(const std::string& name) {
@@ -91,7 +94,8 @@ Result ServiceManager::UnregisterService(const std::string& name) {
     return ResultSuccess;
 }
 
-Result ServiceManager::GetServicePort(Kernel::KPort** out_port, const std::string& name) {
+Result ServiceManager::GetServicePort(Kernel::KClientPort** out_client_port,
+                                      const std::string& name) {
     R_TRY(ValidateServiceName(name));
 
     std::scoped_lock lk{lock};
@@ -101,7 +105,7 @@ Result ServiceManager::GetServicePort(Kernel::KPort** out_port, const std::strin
         return Service::SM::ResultNotRegistered;
     }
 
-    *out_port = it->second;
+    *out_client_port = it->second;
     return ResultSuccess;
 }
 
@@ -172,8 +176,8 @@ Result SM::GetServiceImpl(Kernel::KClientSession** out_client_session, HLEReques
     std::string name(PopServiceName(rp));
 
     // Find the named port.
-    Kernel::KPort* port{};
-    auto port_result = service_manager.GetServicePort(&port, name);
+    Kernel::KClientPort* client_port{};
+    auto port_result = service_manager.GetServicePort(&client_port, name);
     if (port_result == Service::SM::ResultInvalidServiceName) {
         LOG_ERROR(Service_SM, "Invalid service name '{}'", name);
         return Service::SM::ResultInvalidServiceName;
@@ -187,7 +191,7 @@ Result SM::GetServiceImpl(Kernel::KClientSession** out_client_session, HLEReques
 
     // Create a new session.
     Kernel::KClientSession* session{};
-    if (const auto result = port->GetClientPort().CreateSession(&session); result.IsError()) {
+    if (const auto result = client_port->CreateSession(&session); result.IsError()) {
         LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, result.raw);
         return result;
     }
@@ -221,7 +225,9 @@ void SM::RegisterServiceImpl(HLERequestContext& ctx, std::string name, u32 max_s
     LOG_DEBUG(Service_SM, "called with name={}, max_session_count={}, is_light={}", name,
               max_session_count, is_light);
 
-    if (const auto result = service_manager.RegisterService(name, max_session_count, nullptr);
+    Kernel::KServerPort* server_port{};
+    if (const auto result = service_manager.RegisterService(std::addressof(server_port), name,
+                                                            max_session_count, nullptr);
         result.IsError()) {
         LOG_ERROR(Service_SM, "failed to register service with error_code={:08X}", result.raw);
         IPC::ResponseBuilder rb{ctx, 2};
@@ -229,13 +235,9 @@ void SM::RegisterServiceImpl(HLERequestContext& ctx, std::string name, u32 max_s
         return;
     }
 
-    auto* port = Kernel::KPort::Create(kernel);
-    port->Initialize(ServerSessionCountMax, is_light, 0);
-    SCOPE_EXIT({ port->GetClientPort().Close(); });
-
     IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
     rb.Push(ResultSuccess);
-    rb.PushMoveObjects(port->GetServerPort());
+    rb.PushMoveObjects(server_port);
 }
 
 void SM::UnregisterService(HLERequestContext& ctx) {
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index ff74f588a..4ae32a9c1 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -56,10 +56,10 @@ public:
     explicit ServiceManager(Kernel::KernelCore& kernel_);
     ~ServiceManager();
 
-    Result RegisterService(std::string name, u32 max_sessions,
-                           SessionRequestHandlerFactory handler_factory);
+    Result RegisterService(Kernel::KServerPort** out_server_port, std::string name,
+                           u32 max_sessions, SessionRequestHandlerFactory handler_factory);
     Result UnregisterService(const std::string& name);
-    Result GetServicePort(Kernel::KPort** out_port, const std::string& name);
+    Result GetServicePort(Kernel::KClientPort** out_client_port, const std::string& name);
 
     template <Common::DerivedFrom<SessionRequestHandler> T>
     std::shared_ptr<T> GetService(const std::string& service_name) const {
@@ -84,7 +84,7 @@ private:
     /// Map of registered services, retrieved using GetServicePort.
     std::mutex lock;
     std::unordered_map<std::string, SessionRequestHandlerFactory> registered_services;
-    std::unordered_map<std::string, Kernel::KPort*> service_ports;
+    std::unordered_map<std::string, Kernel::KClientPort*> service_ports;
 
     /// Kernel context
     Kernel::KernelCore& kernel;
diff --git a/src/core/hle/service/sm/sm_controller.cpp b/src/core/hle/service/sm/sm_controller.cpp
index 7dce28fe0..7f0fb91d0 100644
--- a/src/core/hle/service/sm/sm_controller.cpp
+++ b/src/core/hle/service/sm/sm_controller.cpp
@@ -28,7 +28,6 @@ void Controller::ConvertCurrentObjectToDomain(HLERequestContext& ctx) {
 void Controller::CloneCurrentObject(HLERequestContext& ctx) {
     LOG_DEBUG(Service, "called");
 
-    auto& process = *ctx.GetThread().GetOwnerProcess();
     auto session_manager = ctx.GetManager();
 
     // FIXME: this is duplicated from the SVC, it should just call it instead
@@ -36,11 +35,11 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) {
 
     // Reserve a new session from the process resource limit.
     Kernel::KScopedResourceReservation session_reservation(
-        &process, Kernel::LimitableResource::SessionCountMax);
+        Kernel::GetCurrentProcessPointer(kernel), Kernel::LimitableResource::SessionCountMax);
     ASSERT(session_reservation.Succeeded());
 
     // Create the session.
-    Kernel::KSession* session = Kernel::KSession::Create(system.Kernel());
+    Kernel::KSession* session = Kernel::KSession::Create(kernel);
     ASSERT(session != nullptr);
 
     // Initialize the session.
@@ -50,7 +49,7 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) {
     session_reservation.Commit();
 
     // Register the session.
-    Kernel::KSession::Register(system.Kernel(), session);
+    Kernel::KSession::Register(kernel, session);
 
     // Register with server manager.
     session_manager->GetServerManager().RegisterSession(&session->GetServerSession(),
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index 60ee78e89..c9f8707b7 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -129,9 +129,10 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
     }
     metadata.Print();
 
-    // Enable NCE only for programs with 39-bit address space.
+    // Enable NCE only for applications with 39-bit address space.
     const bool is_39bit =
         metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is39Bit;
+    const bool is_application = metadata.GetPoolPartition() == FileSys::PoolPartition::Application;
     Settings::SetNceEnabled(is_39bit);
 
     const std::array static_modules = {"rtld",    "main",    "subsdk0", "subsdk1", "subsdk2",
@@ -147,7 +148,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
 
     const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* {
 #ifdef HAS_NCE
-        if (Settings::IsNceEnabled()) {
+        if (is_application && Settings::IsNceEnabled()) {
             return &module_patchers[i];
         }
 #endif
@@ -175,7 +176,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
 
     // Enable direct memory mapping in case of NCE.
     const u64 fastmem_base = [&]() -> size_t {
-        if (Settings::IsNceEnabled()) {
+        if (is_application && Settings::IsNceEnabled()) {
             auto& buffer = system.DeviceMemory().buffer;
             buffer.EnableDirectMappedAddress();
             return reinterpret_cast<u64>(buffer.VirtualBasePointer());
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 169bf4c8c..c7eb32c19 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -45,7 +45,13 @@ struct Memory::Impl {
 
     void SetCurrentPageTable(Kernel::KProcess& process) {
         current_page_table = &process.GetPageTable().GetImpl();
-        current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
+
+        if (std::addressof(process) == system.ApplicationProcess() &&
+            Settings::IsFastmemEnabled()) {
+            current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
+        } else {
+            current_page_table->fastmem_arena = nullptr;
+        }
     }
 
     void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
@@ -57,7 +63,7 @@ struct Memory::Impl {
         MapPages(page_table, base / YUZU_PAGESIZE, size / YUZU_PAGESIZE, target,
                  Common::PageType::Memory);
 
-        if (Settings::IsFastmemEnabled()) {
+        if (current_page_table->fastmem_arena) {
             system.DeviceMemory().buffer.Map(GetInteger(base),
                                              GetInteger(target) - DramMemoryMap::Base, size, perms);
         }
@@ -69,7 +75,7 @@ struct Memory::Impl {
         MapPages(page_table, base / YUZU_PAGESIZE, size / YUZU_PAGESIZE, 0,
                  Common::PageType::Unmapped);
 
-        if (Settings::IsFastmemEnabled()) {
+        if (current_page_table->fastmem_arena) {
             system.DeviceMemory().buffer.Unmap(GetInteger(base), size);
         }
     }
@@ -79,7 +85,7 @@ struct Memory::Impl {
         ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size);
         ASSERT_MSG((vaddr & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", vaddr);
 
-        if (!Settings::IsFastmemEnabled()) {
+        if (!current_page_table->fastmem_arena) {
             return;
         }
 
@@ -88,11 +94,6 @@ struct Memory::Impl {
         const bool is_x =
             True(perms & Common::MemoryPermission::Execute) && Settings::IsNceEnabled();
 
-        if (!current_page_table) {
-            system.DeviceMemory().buffer.Protect(vaddr, size, is_r, is_w, is_x);
-            return;
-        }
-
         u64 protect_bytes{};
         u64 protect_begin{};
         for (u64 addr = vaddr; addr < vaddr + size; addr += YUZU_PAGESIZE) {
@@ -239,7 +240,7 @@ struct Memory::Impl {
 
     bool WalkBlock(const Common::ProcessAddress addr, const std::size_t size, auto on_unmapped,
                    auto on_memory, auto on_rasterizer, auto increment) {
-        const auto& page_table = system.ApplicationProcess()->GetPageTable().GetImpl();
+        const auto& page_table = *current_page_table;
         std::size_t remaining_size = size;
         std::size_t page_index = addr >> YUZU_PAGEBITS;
         std::size_t page_offset = addr & YUZU_PAGEMASK;
@@ -484,7 +485,7 @@ struct Memory::Impl {
             return;
         }
 
-        if (Settings::IsFastmemEnabled()) {
+        if (current_page_table->fastmem_arena) {
             system.DeviceMemory().buffer.Protect(vaddr, size, !debug, !debug);
         }
 
@@ -541,7 +542,7 @@ struct Memory::Impl {
             return;
         }
 
-        if (Settings::IsFastmemEnabled()) {
+        if (current_page_table->fastmem_arena) {
             const bool is_read_enable =
                 !Settings::values.use_reactive_flushing.GetValue() || !cached;
             system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
@@ -886,8 +887,7 @@ void Memory::ProtectRegion(Common::PageTable& page_table, Common::ProcessAddress
 }
 
 bool Memory::IsValidVirtualAddress(const Common::ProcessAddress vaddr) const {
-    const Kernel::KProcess& process = *system.ApplicationProcess();
-    const auto& page_table = process.GetPageTable().GetImpl();
+    const auto& page_table = *impl->current_page_table;
     const size_t page = vaddr >> YUZU_PAGEBITS;
     if (page >= page_table.pointers.size()) {
         return false;
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index e5a78a914..feca5105f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -74,6 +74,11 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
     case IR::Attribute::ClipDistance7: {
         const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)};
         const u32 index{static_cast<u32>(attr) - base};
+        if (index >= ctx.profile.max_user_clip_distances) {
+            LOG_WARNING(Shader, "Ignoring clip distance store {} >= {} supported", index,
+                        ctx.profile.max_user_clip_distances);
+            return std::nullopt;
+        }
         const Id clip_num{ctx.Const(index)};
         return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num);
     }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index 22ceca19c..800754554 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -214,16 +214,16 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& ind
     }
 }
 
-Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
+std::pair<Id, bool> Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
     if (!index.IsImmediate() || index.U32() != 0) {
         throw NotImplementedException("Indirect image indexing");
     }
     if (info.type == TextureType::Buffer) {
         const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)};
-        return ctx.OpLoad(def.image_type, def.id);
+        return {ctx.OpLoad(def.image_type, def.id), def.is_integer};
     } else {
         const ImageDefinition def{ctx.images.at(info.descriptor_index)};
-        return ctx.OpLoad(def.image_type, def.id);
+        return {ctx.OpLoad(def.image_type, def.id), def.is_integer};
     }
 }
 
@@ -566,13 +566,23 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
         LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
         return ctx.ConstantNull(ctx.U32[4]);
     }
-    return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4],
-                Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{});
+    const auto [image, is_integer] = Image(ctx, index, info);
+    const Id result_type{is_integer ? ctx.U32[4] : ctx.F32[4]};
+    Id color{Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst,
+                  result_type, image, coords, std::nullopt, std::span<const Id>{})};
+    if (!is_integer) {
+        color = ctx.OpBitcast(ctx.U32[4], color);
+    }
+    return color;
 }
 
 void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
     const auto info{inst->Flags<IR::TextureInstInfo>()};
-    ctx.OpImageWrite(Image(ctx, index, info), coords, color);
+    const auto [image, is_integer] = Image(ctx, index, info);
+    if (!is_integer) {
+        color = ctx.OpBitcast(ctx.F32[4], color);
+    }
+    ctx.OpImageWrite(image, coords, color);
 }
 
 Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index) {
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 3350f1f85..ed023fcfe 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -74,20 +74,19 @@ spv::ImageFormat GetImageFormat(ImageFormat format) {
     throw InvalidArgument("Invalid image format {}", format);
 }
 
-Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) {
+Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) {
     const spv::ImageFormat format{GetImageFormat(desc.format)};
-    const Id type{ctx.U32[1]};
     switch (desc.type) {
     case TextureType::Color1D:
-        return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format);
+        return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format);
     case TextureType::ColorArray1D:
-        return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format);
+        return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 2, format);
     case TextureType::Color2D:
-        return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format);
+        return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format);
     case TextureType::ColorArray2D:
-        return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format);
+        return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, 2, format);
     case TextureType::Color3D:
-        return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format);
+        return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, 2, format);
     case TextureType::Buffer:
         throw NotImplementedException("Image buffer");
     default:
@@ -1273,7 +1272,9 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
             throw NotImplementedException("Array of image buffers");
         }
         const spv::ImageFormat format{GetImageFormat(desc.format)};
-        const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)};
+        const Id sampled_type{desc.is_integer ? U32[1] : F32[1]};
+        const Id image_type{
+            TypeImage(sampled_type, spv::Dim::Buffer, false, false, false, 2, format)};
         const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
         const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
         Decorate(id, spv::Decoration::Binding, binding);
@@ -1283,6 +1284,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
             .id = id,
             .image_type = image_type,
             .count = desc.count,
+            .is_integer = desc.is_integer,
         });
         if (profile.supported_spirv >= 0x00010400) {
             interfaces.push_back(id);
@@ -1327,7 +1329,8 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde
         if (desc.count != 1) {
             throw NotImplementedException("Array of images");
         }
-        const Id image_type{ImageType(*this, desc)};
+        const Id sampled_type{desc.is_integer ? U32[1] : F32[1]};
+        const Id image_type{ImageType(*this, desc, sampled_type)};
         const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
         const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
         Decorate(id, spv::Decoration::Binding, binding);
@@ -1337,6 +1340,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde
             .id = id,
             .image_type = image_type,
             .count = desc.count,
+            .is_integer = desc.is_integer,
         });
         if (profile.supported_spirv >= 0x00010400) {
             interfaces.push_back(id);
@@ -1528,7 +1532,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
         if (stage == Stage::Fragment) {
             throw NotImplementedException("Storing ClipDistance in fragment stage");
         }
-        const Id type{TypeArray(F32[1], Const(8U))};
+        const Id type{TypeArray(
+            F32[1], Const(std::min(info.used_clip_distances, profile.max_user_clip_distances)))};
         clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance);
     }
     if (info.stores[IR::Attribute::Layer] &&
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index 1aa79863d..56019ad89 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -47,12 +47,14 @@ struct ImageBufferDefinition {
     Id id;
     Id image_type;
     u32 count;
+    bool is_integer;
 };
 
 struct ImageDefinition {
     Id id;
     Id image_type;
     u32 count;
+    bool is_integer;
 };
 
 struct UniformDefinitions {
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 15285ab0a..e30bf094a 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -24,6 +24,8 @@ public:
 
     [[nodiscard]] virtual TexturePixelFormat ReadTexturePixelFormat(u32 raw_handle) = 0;
 
+    [[nodiscard]] virtual bool IsTexturePixelFormatInteger(u32 raw_handle) = 0;
+
     [[nodiscard]] virtual u32 ReadViewportTransformState() = 0;
 
     [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0;
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 70292686f..cb82a326c 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -913,7 +913,11 @@ void GatherInfoFromHeader(Environment& env, Info& info) {
         }
         for (size_t index = 0; index < 8; ++index) {
             const u16 mask{header.vtg.omap_systemc.clip_distances};
-            info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
+            const bool used{((mask >> index) & 1) != 0};
+            info.stores.Set(IR::Attribute::ClipDistance0 + index, used);
+            if (used) {
+                info.used_clip_distances = static_cast<u32>(index) + 1;
+            }
         }
         info.stores.Set(IR::Attribute::PrimitiveId,
                         header.vtg.omap_systemb.primitive_array_id != 0);
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index ec12c843a..e4a73a360 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -815,6 +815,15 @@ bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coor
     return true;
 }
 
+void ConvertDerivatives(std::array<IR::Value, 3>& results, IR::IREmitter& ir) {
+    for (size_t i = 0; i < 3; i++) {
+        if (results[i].Type() == IR::Type::U32) {
+            results[i] = results[i].IsImmediate() ? ir.Imm32(Common::BitCast<f32>(results[i].U32()))
+                                                  : ir.BitCast<IR::F32>(IR::U32(results[i]));
+        }
+    }
+}
+
 void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
     IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>();
     auto orig_opcode = inst.GetOpcode();
@@ -831,12 +840,14 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
     if (!offset.IsImmediate()) {
         return;
     }
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
     IR::Inst* const inst2 = coords.InstRecursive();
     std::array<std::array<IR::Value, 3>, 3> results_matrix;
     for (size_t i = 0; i < 3; i++) {
         if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) {
             return;
         }
+        ConvertDerivatives(results_matrix[i], ir);
     }
     IR::F32 lod_clamp{};
     if (info.has_lod_clamp != 0) {
@@ -846,7 +857,6 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
             lod_clamp = IR::F32{bias_lc};
         }
     }
-    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
     IR::Value new_coords =
         ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]);
     IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index d374c976a..100437f0e 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -372,6 +372,10 @@ TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAdd
     return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf));
 }
 
+bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf) {
+    return env.IsTexturePixelFormatInteger(GetTextureHandle(env, cbuf));
+}
+
 class Descriptors {
 public:
     explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
@@ -403,6 +407,7 @@ public:
         })};
         image_buffer_descriptors[index].is_written |= desc.is_written;
         image_buffer_descriptors[index].is_read |= desc.is_read;
+        image_buffer_descriptors[index].is_integer |= desc.is_integer;
         return index;
     }
 
@@ -432,6 +437,7 @@ public:
         })};
         image_descriptors[index].is_written |= desc.is_written;
         image_descriptors[index].is_read |= desc.is_read;
+        image_descriptors[index].is_integer |= desc.is_integer;
         return index;
     }
 
@@ -469,6 +475,20 @@ void PatchImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
                         ir.FPRecip(ir.ConvertUToF(32, 32, ir.CompositeExtract(texture_size, 1))))));
 }
 
+bool IsPixelFormatSNorm(TexturePixelFormat pixel_format) {
+    switch (pixel_format) {
+    case TexturePixelFormat::A8B8G8R8_SNORM:
+    case TexturePixelFormat::R8G8_SNORM:
+    case TexturePixelFormat::R8_SNORM:
+    case TexturePixelFormat::R16G16B16A16_SNORM:
+    case TexturePixelFormat::R16G16_SNORM:
+    case TexturePixelFormat::R16_SNORM:
+        return true;
+    default:
+        return false;
+    }
+}
+
 void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_format) {
     const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@@ -587,11 +607,13 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
             }
             const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
             const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite};
+            const bool is_integer{IsTexturePixelFormatInteger(env, cbuf)};
             if (flags.type == TextureType::Buffer) {
                 index = descriptors.Add(ImageBufferDescriptor{
                     .format = flags.image_format,
                     .is_written = is_written,
                     .is_read = is_read,
+                    .is_integer = is_integer,
                     .cbuf_index = cbuf.index,
                     .cbuf_offset = cbuf.offset,
                     .count = cbuf.count,
@@ -603,6 +625,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
                     .format = flags.image_format,
                     .is_written = is_written,
                     .is_read = is_read,
+                    .is_integer = is_integer,
                     .cbuf_index = cbuf.index,
                     .cbuf_offset = cbuf.offset,
                     .count = cbuf.count,
@@ -658,7 +681,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
         if (!host_info.support_snorm_render_buffer && inst->GetOpcode() == IR::Opcode::ImageFetch &&
             flags.type == TextureType::Buffer) {
             const auto pixel_format = ReadTexturePixelFormat(env, cbuf);
-            if (pixel_format != TexturePixelFormat::OTHER) {
+            if (IsPixelFormatSNorm(pixel_format)) {
                 PatchTexelFetch(*texture_inst.block, *texture_inst.inst, pixel_format);
             }
         }
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 66901a965..7578d41cc 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -87,6 +87,8 @@ struct Profile {
     bool has_broken_robust{};
 
     u64 min_ssbo_alignment{};
+
+    u32 max_user_clip_distances{};
 };
 
 } // namespace Shader
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index b4b4afd37..ed13e6820 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -35,14 +35,109 @@ enum class TextureType : u32 {
 };
 constexpr u32 NUM_TEXTURE_TYPES = 9;
 
-enum class TexturePixelFormat : u32 {
+enum class TexturePixelFormat {
+    A8B8G8R8_UNORM,
     A8B8G8R8_SNORM,
+    A8B8G8R8_SINT,
+    A8B8G8R8_UINT,
+    R5G6B5_UNORM,
+    B5G6R5_UNORM,
+    A1R5G5B5_UNORM,
+    A2B10G10R10_UNORM,
+    A2B10G10R10_UINT,
+    A2R10G10B10_UNORM,
+    A1B5G5R5_UNORM,
+    A5B5G5R1_UNORM,
+    R8_UNORM,
     R8_SNORM,
-    R8G8_SNORM,
+    R8_SINT,
+    R8_UINT,
+    R16G16B16A16_FLOAT,
+    R16G16B16A16_UNORM,
     R16G16B16A16_SNORM,
-    R16G16_SNORM,
+    R16G16B16A16_SINT,
+    R16G16B16A16_UINT,
+    B10G11R11_FLOAT,
+    R32G32B32A32_UINT,
+    BC1_RGBA_UNORM,
+    BC2_UNORM,
+    BC3_UNORM,
+    BC4_UNORM,
+    BC4_SNORM,
+    BC5_UNORM,
+    BC5_SNORM,
+    BC7_UNORM,
+    BC6H_UFLOAT,
+    BC6H_SFLOAT,
+    ASTC_2D_4X4_UNORM,
+    B8G8R8A8_UNORM,
+    R32G32B32A32_FLOAT,
+    R32G32B32A32_SINT,
+    R32G32_FLOAT,
+    R32G32_SINT,
+    R32_FLOAT,
+    R16_FLOAT,
+    R16_UNORM,
     R16_SNORM,
-    OTHER
+    R16_UINT,
+    R16_SINT,
+    R16G16_UNORM,
+    R16G16_FLOAT,
+    R16G16_UINT,
+    R16G16_SINT,
+    R16G16_SNORM,
+    R32G32B32_FLOAT,
+    A8B8G8R8_SRGB,
+    R8G8_UNORM,
+    R8G8_SNORM,
+    R8G8_SINT,
+    R8G8_UINT,
+    R32G32_UINT,
+    R16G16B16X16_FLOAT,
+    R32_UINT,
+    R32_SINT,
+    ASTC_2D_8X8_UNORM,
+    ASTC_2D_8X5_UNORM,
+    ASTC_2D_5X4_UNORM,
+    B8G8R8A8_SRGB,
+    BC1_RGBA_SRGB,
+    BC2_SRGB,
+    BC3_SRGB,
+    BC7_SRGB,
+    A4B4G4R4_UNORM,
+    G4R4_UNORM,
+    ASTC_2D_4X4_SRGB,
+    ASTC_2D_8X8_SRGB,
+    ASTC_2D_8X5_SRGB,
+    ASTC_2D_5X4_SRGB,
+    ASTC_2D_5X5_UNORM,
+    ASTC_2D_5X5_SRGB,
+    ASTC_2D_10X8_UNORM,
+    ASTC_2D_10X8_SRGB,
+    ASTC_2D_6X6_UNORM,
+    ASTC_2D_6X6_SRGB,
+    ASTC_2D_10X6_UNORM,
+    ASTC_2D_10X6_SRGB,
+    ASTC_2D_10X5_UNORM,
+    ASTC_2D_10X5_SRGB,
+    ASTC_2D_10X10_UNORM,
+    ASTC_2D_10X10_SRGB,
+    ASTC_2D_12X10_UNORM,
+    ASTC_2D_12X10_SRGB,
+    ASTC_2D_12X12_UNORM,
+    ASTC_2D_12X12_SRGB,
+    ASTC_2D_8X6_UNORM,
+    ASTC_2D_8X6_SRGB,
+    ASTC_2D_6X5_UNORM,
+    ASTC_2D_6X5_SRGB,
+    E5B9G9R9_FLOAT,
+    D32_FLOAT,
+    D16_UNORM,
+    X8_D24_UNORM,
+    S8_UINT,
+    D24_UNORM_S8_UINT,
+    S8_UINT_D24_UNORM,
+    D32_FLOAT_S8_UINT,
 };
 
 enum class ImageFormat : u32 {
@@ -97,6 +192,7 @@ struct ImageBufferDescriptor {
     ImageFormat format;
     bool is_written;
     bool is_read;
+    bool is_integer;
     u32 cbuf_index;
     u32 cbuf_offset;
     u32 count;
@@ -129,6 +225,7 @@ struct ImageDescriptor {
     ImageFormat format;
     bool is_written;
     bool is_read;
+    bool is_integer;
     u32 cbuf_index;
     u32 cbuf_offset;
     u32 count;
@@ -227,6 +324,8 @@ struct Info {
     bool requires_layer_emulation{};
     IR::Attribute emulated_layer{};
 
+    u32 used_clip_distances{};
+
     boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
         constant_buffer_descriptors;
     boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 592c28ba3..95ba4f76c 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -586,14 +586,22 @@ void Maxwell3D::ProcessQueryCondition() {
 }
 
 void Maxwell3D::ProcessCounterReset() {
-    switch (regs.clear_report_value) {
-    case Regs::ClearReport::ZPassPixelCount:
-        rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
-        break;
-    default:
-        LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
-        break;
-    }
+    const auto query_type = [clear_report = regs.clear_report_value]() {
+        switch (clear_report) {
+        case Tegra::Engines::Maxwell3D::Regs::ClearReport::ZPassPixelCount:
+            return VideoCommon::QueryType::ZPassPixelCount64;
+        case Tegra::Engines::Maxwell3D::Regs::ClearReport::StreamingPrimitivesSucceeded:
+            return VideoCommon::QueryType::StreamingPrimitivesSucceeded;
+        case Tegra::Engines::Maxwell3D::Regs::ClearReport::PrimitivesGenerated:
+            return VideoCommon::QueryType::PrimitivesGenerated;
+        case Tegra::Engines::Maxwell3D::Regs::ClearReport::VtgPrimitivesOut:
+            return VideoCommon::QueryType::VtgPrimitivesOut;
+        default:
+            LOG_DEBUG(HW_GPU, "Unimplemented counter reset={}", clear_report);
+            return VideoCommon::QueryType::Payload;
+        }
+    }();
+    rasterizer->ResetCounter(query_type);
 }
 
 void Maxwell3D::ProcessSyncPoint() {
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 9fcaeeac7..a64404ce4 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -28,8 +28,11 @@
 namespace VideoCore {
 enum class QueryType {
     SamplesPassed,
+    PrimitivesGenerated,
+    TfbPrimitivesWritten,
+    Count,
 };
-constexpr std::size_t NumQueryTypes = 1;
+constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count);
 } // namespace VideoCore
 
 namespace VideoCommon {
@@ -44,15 +47,6 @@ public:
     explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_)
         : cache{cache_}, type{type_} {}
 
-    /// Updates the state of the stream, enabling or disabling as needed.
-    void Update(bool enabled) {
-        if (enabled) {
-            Enable();
-        } else {
-            Disable();
-        }
-    }
-
     /// Resets the stream to zero. It doesn't disable the query after resetting.
     void Reset() {
         if (current) {
@@ -80,7 +74,6 @@ public:
         return current != nullptr;
     }
 
-private:
     /// Enables the stream.
     void Enable() {
         if (current) {
@@ -97,6 +90,7 @@ private:
         last = std::exchange(current, nullptr);
     }
 
+private:
     QueryCache& cache;
     const VideoCore::QueryType type;
 
@@ -112,8 +106,14 @@ public:
         : rasterizer{rasterizer_},
           // Use reinterpret_cast instead of static_cast as workaround for
           // UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
-          cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast<QueryCache&>(*this),
-                                                          VideoCore::QueryType::SamplesPassed}}} {
+          cpu_memory{cpu_memory_}, streams{{
+                                       {CounterStream{reinterpret_cast<QueryCache&>(*this),
+                                                      VideoCore::QueryType::SamplesPassed}},
+                                       {CounterStream{reinterpret_cast<QueryCache&>(*this),
+                                                      VideoCore::QueryType::PrimitivesGenerated}},
+                                       {CounterStream{reinterpret_cast<QueryCache&>(*this),
+                                                      VideoCore::QueryType::TfbPrimitivesWritten}},
+                                   }} {
         (void)slot_async_jobs.insert(); // Null value
     }
 
@@ -157,12 +157,11 @@ public:
         AsyncFlushQuery(query, timestamp, lock);
     }
 
-    /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
-    void UpdateCounters() {
+    /// Enables all available GPU counters
+    void EnableCounters() {
         std::unique_lock lock{mutex};
-        if (maxwell3d) {
-            const auto& regs = maxwell3d->regs;
-            Stream(VideoCore::QueryType::SamplesPassed).Update(regs.zpass_pixel_count_enable);
+        for (auto& stream : streams) {
+            stream.Enable();
         }
     }
 
@@ -176,7 +175,7 @@ public:
     void DisableStreams() {
         std::unique_lock lock{mutex};
         for (auto& stream : streams) {
-            stream.Update(false);
+            stream.Disable();
         }
     }
 
@@ -353,7 +352,7 @@ private:
 
     std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
     std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
-};
+}; // namespace VideoCommon
 
 template <class QueryCache, class HostCounter>
 class HostCounterBase {
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index ec142d48e..fef7360ed 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -18,16 +18,27 @@ namespace OpenGL {
 
 namespace {
 
-constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
-
 constexpr GLenum GetTarget(VideoCore::QueryType type) {
-    return QueryTargets[static_cast<std::size_t>(type)];
+    switch (type) {
+    case VideoCore::QueryType::SamplesPassed:
+        return GL_SAMPLES_PASSED;
+    case VideoCore::QueryType::PrimitivesGenerated:
+        return GL_PRIMITIVES_GENERATED;
+    case VideoCore::QueryType::TfbPrimitivesWritten:
+        return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN;
+    default:
+        break;
+    }
+    UNIMPLEMENTED_MSG("Query type {}", type);
+    return 0;
 }
 
 } // Anonymous namespace
 
 QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
-    : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
+    : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {
+    EnableCounters();
+}
 
 QueryCache::~QueryCache() = default;
 
@@ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) {
     auto& stream = cache->Stream(type);
     const bool slice_counter = WaitPending() && stream.IsEnabled();
     if (slice_counter) {
-        stream.Update(false);
+        stream.Disable();
     }
 
     auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
 
     if (slice_counter) {
-        stream.Update(true);
+        stream.Enable();
     }
 
     return result;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 291515e73..7a5fad735 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
 void oglEnable(GLenum cap, bool state) {
     (state ? glEnable : glDisable)(cap);
 }
+
+std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) {
+    switch (type) {
+    case VideoCommon::QueryType::PrimitivesGenerated:
+    case VideoCommon::QueryType::VtgPrimitivesOut:
+        return VideoCore::QueryType::PrimitivesGenerated;
+    case VideoCommon::QueryType::ZPassPixelCount64:
+        return VideoCore::QueryType::SamplesPassed;
+    case VideoCommon::QueryType::StreamingPrimitivesSucceeded:
+        // case VideoCommon::QueryType::StreamingByteCount:
+        // TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride
+        return VideoCore::QueryType::TfbPrimitivesWritten;
+    default:
+        return std::nullopt;
+    }
+}
 } // Anonymous namespace
 
 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -216,7 +232,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
 
     SCOPE_EXIT({ gpu.TickWork(); });
     gpu_memory->FlushCaching();
-    query_cache.UpdateCounters();
 
     GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
     if (!pipeline) {
@@ -341,7 +356,6 @@ void RasterizerOpenGL::DrawTexture() {
     MICROPROFILE_SCOPE(OpenGL_Drawing);
 
     SCOPE_EXIT({ gpu.TickWork(); });
-    query_cache.UpdateCounters();
 
     texture_cache.SynchronizeGraphicsDescriptors();
     texture_cache.UpdateRenderTargets(false);
@@ -408,21 +422,28 @@ void RasterizerOpenGL::DispatchCompute() {
 }
 
 void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
-    if (type == VideoCommon::QueryType::ZPassPixelCount64) {
-        query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed);
+    const auto query_cache_type = MaxwellToVideoCoreQuery(type);
+    if (!query_cache_type.has_value()) {
+        UNIMPLEMENTED_IF_MSG(type != VideoCommon::QueryType::Payload, "Reset query type: {}", type);
+        return;
     }
+    query_cache.ResetCounter(*query_cache_type);
 }
 
 void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
                              VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
-    if (type == VideoCommon::QueryType::ZPassPixelCount64) {
-        if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
-            query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
-        } else {
-            query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt);
-        }
-        return;
+    const auto query_cache_type = MaxwellToVideoCoreQuery(type);
+    if (!query_cache_type.has_value()) {
+        return QueryFallback(gpu_addr, type, flags, payload, subreport);
     }
+    const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout);
+    const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt;
+    query_cache.Query(gpu_addr, *query_cache_type, timestamp);
+}
+
+void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
+                                     VideoCommon::QueryPropertiesFlags flags, u32 payload,
+                                     u32 subreport) {
     if (type != VideoCommon::QueryType::Payload) {
         payload = 1u;
     }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d28388a9d..ce3460938 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -231,6 +231,9 @@ private:
     /// End a transform feedback
     void EndTransformFeedback();
 
+    void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
+                       VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport);
+
     Tegra::GPU& gpu;
 
     const Device& device;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 26f2d0ea7..30df41b7d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -51,7 +51,7 @@ using VideoCommon::LoadPipelines;
 using VideoCommon::SerializePipeline;
 using Context = ShaderContext::Context;
 
-constexpr u32 CACHE_VERSION = 9;
+constexpr u32 CACHE_VERSION = 10;
 
 template <typename Container>
 auto MakeSpan(Container& container) {
@@ -233,6 +233,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
           .ignore_nan_fp_comparisons = true,
           .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
           .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(),
+          .max_user_clip_distances = 8,
       },
       host_info{
           .support_float64 = true,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5958f52f7..3c61799fa 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -78,8 +78,15 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
 }
 } // Anonymous namespace
 
-Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
-    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params)
+    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {
+    if (runtime.device.HasNullDescriptor()) {
+        return;
+    }
+    device = &runtime.device;
+    buffer = runtime.CreateNullBuffer();
+    is_null = true;
+}
 
 Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
                VAddr cpu_addr_, u64 size_bytes_)
@@ -93,8 +100,12 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast
 
 VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
     if (!device) {
-        // Null buffer, return a null descriptor
+        // Null buffer supported, return a null descriptor
         return VK_NULL_HANDLE;
+    } else if (is_null) {
+        // Null buffer not supported, adjust offset and size
+        offset = 0;
+        size = 0;
     }
     const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
         return offset == view.offset && size == view.size && format == view.format;
@@ -563,22 +574,27 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
         }
         buffer_handles.push_back(handle);
     }
+    const u32 device_max = device.GetMaxVertexInputBindings();
+    const u32 min_binding = std::min(bindings.min_index, device_max);
+    const u32 max_binding = std::min(bindings.max_index, device_max);
+    const u32 binding_count = max_binding - min_binding;
+    if (binding_count == 0) {
+        return;
+    }
     if (device.IsExtExtendedDynamicStateSupported()) {
-        scheduler.Record([this, bindings_ = std::move(bindings),
-                          buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
-            cmdbuf.BindVertexBuffers2EXT(bindings_.min_index,
-                                         std::min(bindings_.max_index - bindings_.min_index,
-                                                  device.GetMaxVertexInputBindings()),
-                                         buffer_handles_.data(), bindings_.offsets.data(),
-                                         bindings_.sizes.data(), bindings_.strides.data());
+        scheduler.Record([bindings_ = std::move(bindings),
+                          buffer_handles_ = std::move(buffer_handles),
+                          binding_count](vk::CommandBuffer cmdbuf) {
+            cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, binding_count, buffer_handles_.data(),
+                                         bindings_.offsets.data(), bindings_.sizes.data(),
+                                         bindings_.strides.data());
         });
     } else {
-        scheduler.Record([this, bindings_ = std::move(bindings),
-                          buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
-            cmdbuf.BindVertexBuffers(bindings_.min_index,
-                                     std::min(bindings_.max_index - bindings_.min_index,
-                                              device.GetMaxVertexInputBindings()),
-                                     buffer_handles_.data(), bindings_.offsets.data());
+        scheduler.Record([bindings_ = std::move(bindings),
+                          buffer_handles_ = std::move(buffer_handles),
+                          binding_count](vk::CommandBuffer cmdbuf) {
+            cmdbuf.BindVertexBuffers(bindings_.min_index, binding_count, buffer_handles_.data(),
+                                     bindings_.offsets.data());
         });
     }
 }
@@ -622,9 +638,12 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<
 }
 
 void BufferCacheRuntime::ReserveNullBuffer() {
-    if (null_buffer) {
-        return;
+    if (!null_buffer) {
+        null_buffer = CreateNullBuffer();
     }
+}
+
+vk::Buffer BufferCacheRuntime::CreateNullBuffer() {
     VkBufferCreateInfo create_info{
         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
         .pNext = nullptr,
@@ -639,15 +658,17 @@ void BufferCacheRuntime::ReserveNullBuffer() {
     if (device.IsExtTransformFeedbackSupported()) {
         create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
     }
-    null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
+    vk::Buffer ret = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
     if (device.HasDebuggingToolAttached()) {
-        null_buffer.SetObjectNameEXT("Null buffer");
+        ret.SetObjectNameEXT("Null buffer");
     }
 
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([buffer = *ret](vk::CommandBuffer cmdbuf) {
         cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
     });
+
+    return ret;
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 0b3fbd6d0..dc300d7cb 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -63,6 +63,7 @@ private:
     vk::Buffer buffer;
     std::vector<BufferView> views;
     VideoCommon::UsageTracker tracker;
+    bool is_null{};
 };
 
 class QuadArrayIndexBuffer;
@@ -151,6 +152,7 @@ private:
     }
 
     void ReserveNullBuffer();
+    vk::Buffer CreateNullBuffer();
 
     const Device& device;
     MemoryAllocator& memory_allocator;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 2a13b2a72..d1841198d 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment;
 using VideoCommon::GenericEnvironment;
 using VideoCommon::GraphicsEnvironment;
 
-constexpr u32 CACHE_VERSION = 10;
+constexpr u32 CACHE_VERSION = 11;
 constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'};
 
 template <typename Container>
@@ -374,6 +374,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
         .has_broken_robust =
             device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal,
         .min_ssbo_alignment = device.GetStorageBufferAlignment(),
+        .max_user_clip_distances = device.GetMaxUserClipDistances(),
     };
 
     host_info = Shader::HostTranslateInfo{
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 078777cdd..95954ade7 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -289,12 +289,15 @@ public:
         }
 
         if (has_multi_queries) {
-            size_t intermediary_buffer_index = ObtainBuffer<false>(num_slots_used);
+            const size_t min_accumulation_limit =
+                std::min(first_accumulation_checkpoint, num_slots_used);
+            const size_t max_accumulation_limit =
+                std::max(last_accumulation_checkpoint, num_slots_used);
+            const size_t intermediary_buffer_index = ObtainBuffer<false>(num_slots_used);
             resolve_buffers.push_back(intermediary_buffer_index);
             queries_prefix_scan_pass->Run(*accumulation_buffer, *buffers[intermediary_buffer_index],
                                           *buffers[resolve_buffer_index], num_slots_used,
-                                          std::min(first_accumulation_checkpoint, num_slots_used),
-                                          last_accumulation_checkpoint);
+                                          min_accumulation_limit, max_accumulation_limit);
 
         } else {
             scheduler.RequestOutsideRenderPassOperationContext();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 59829c88b..241fc34be 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -485,6 +485,10 @@ void RasterizerVulkan::DispatchCompute() {
 }
 
 void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) {
+    if (type != VideoCommon::QueryType::ZPassPixelCount64) {
+        LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type);
+        return;
+    }
     query_cache.CounterReset(type);
 }
 
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 4edbe5700..492440ac4 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -62,23 +62,9 @@ static Shader::TextureType ConvertTextureType(const Tegra::Texture::TICEntry& en
 }
 
 static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture::TICEntry& entry) {
-    switch (PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type, entry.b_type,
-                                       entry.a_type, entry.srgb_conversion)) {
-    case VideoCore::Surface::PixelFormat::A8B8G8R8_SNORM:
-        return Shader::TexturePixelFormat::A8B8G8R8_SNORM;
-    case VideoCore::Surface::PixelFormat::R8_SNORM:
-        return Shader::TexturePixelFormat::R8_SNORM;
-    case VideoCore::Surface::PixelFormat::R8G8_SNORM:
-        return Shader::TexturePixelFormat::R8G8_SNORM;
-    case VideoCore::Surface::PixelFormat::R16G16B16A16_SNORM:
-        return Shader::TexturePixelFormat::R16G16B16A16_SNORM;
-    case VideoCore::Surface::PixelFormat::R16G16_SNORM:
-        return Shader::TexturePixelFormat::R16G16_SNORM;
-    case VideoCore::Surface::PixelFormat::R16_SNORM:
-        return Shader::TexturePixelFormat::R16_SNORM;
-    default:
-        return Shader::TexturePixelFormat::OTHER;
-    }
+    return static_cast<Shader::TexturePixelFormat>(
+        PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type, entry.b_type,
+                                   entry.a_type, entry.srgb_conversion));
 }
 
 static std::string_view StageToPrefix(Shader::Stage stage) {
@@ -398,6 +384,11 @@ Shader::TexturePixelFormat GraphicsEnvironment::ReadTexturePixelFormat(u32 handl
     return result;
 }
 
+bool GraphicsEnvironment::IsTexturePixelFormatInteger(u32 handle) {
+    return VideoCore::Surface::IsPixelFormatInteger(
+        static_cast<VideoCore::Surface::PixelFormat>(ReadTexturePixelFormat(handle)));
+}
+
 u32 GraphicsEnvironment::ReadViewportTransformState() {
     const auto& regs{maxwell3d->regs};
     viewport_transform_state = regs.viewport_scale_offset_enabled;
@@ -448,6 +439,11 @@ Shader::TexturePixelFormat ComputeEnvironment::ReadTexturePixelFormat(u32 handle
     return result;
 }
 
+bool ComputeEnvironment::IsTexturePixelFormatInteger(u32 handle) {
+    return VideoCore::Surface::IsPixelFormatInteger(
+        static_cast<VideoCore::Surface::PixelFormat>(ReadTexturePixelFormat(handle)));
+}
+
 u32 ComputeEnvironment::ReadViewportTransformState() {
     return viewport_transform_state;
 }
@@ -551,6 +547,11 @@ Shader::TexturePixelFormat FileEnvironment::ReadTexturePixelFormat(u32 handle) {
     return it->second;
 }
 
+bool FileEnvironment::IsTexturePixelFormatInteger(u32 handle) {
+    return VideoCore::Surface::IsPixelFormatInteger(
+        static_cast<VideoCore::Surface::PixelFormat>(ReadTexturePixelFormat(handle)));
+}
+
 u32 FileEnvironment::ReadViewportTransformState() {
     return viewport_transform_state;
 }
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index b90f3d44e..6b372e336 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -115,6 +115,8 @@ public:
 
     Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
 
+    bool IsTexturePixelFormatInteger(u32 handle) override;
+
     u32 ReadViewportTransformState() override;
 
     std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, u32 offset) override;
@@ -139,6 +141,8 @@ public:
 
     Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
 
+    bool IsTexturePixelFormatInteger(u32 handle) override;
+
     u32 ReadViewportTransformState() override;
 
     std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(
@@ -171,6 +175,8 @@ public:
 
     [[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) override;
 
+    [[nodiscard]] bool IsTexturePixelFormatInteger(u32 handle) override;
+
     [[nodiscard]] u32 ReadViewportTransformState() override;
 
     [[nodiscard]] u32 LocalMemorySize() const override;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 1fda0042d..a6fbca69e 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -695,6 +695,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
             std::min(properties.properties.limits.maxVertexInputBindings, 16U);
     }
 
+    if (is_turnip) {
+        LOG_WARNING(Render_Vulkan, "Turnip requires higher-than-reported binding limits");
+        properties.properties.limits.maxVertexInputBindings = 32;
+    }
+
     if (!extensions.extended_dynamic_state && extensions.extended_dynamic_state2) {
         LOG_INFO(Render_Vulkan,
                  "Removing extendedDynamicState2 due to missing extendedDynamicState");
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 4f3846345..701817086 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -665,6 +665,10 @@ public:
         return properties.properties.limits.maxViewports;
     }
 
+    u32 GetMaxUserClipDistances() const {
+        return properties.properties.limits.maxClipDistances;
+    }
+
     bool SupportsConditionalBarriers() const {
         return supports_conditional_barriers;
     }
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 70cf14afa..2f78b8af0 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -377,6 +377,8 @@ const char* ToString(VkResult result) noexcept {
         return "VK_OPERATION_DEFERRED_KHR";
     case VkResult::VK_OPERATION_NOT_DEFERRED_KHR:
         return "VK_OPERATION_NOT_DEFERRED_KHR";
+    case VkResult::VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR:
+        return "VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR";
     case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT:
         return "VK_PIPELINE_COMPILE_REQUIRED_EXT";
     case VkResult::VK_RESULT_MAX_ENUM:
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index fd6bebf0f..0836bcb7e 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -106,32 +106,30 @@ ConfigureGraphics::ConfigureGraphics(
                                                 Settings::values.bg_green.GetValue(),
                                                 Settings::values.bg_blue.GetValue()));
     UpdateAPILayout();
-    PopulateVSyncModeSelection(); //< must happen after UpdateAPILayout
+    PopulateVSyncModeSelection(false); //< must happen after UpdateAPILayout
 
     // VSync setting needs to be determined after populating the VSync combobox
-    if (Settings::IsConfiguringGlobal()) {
-        const auto vsync_mode_setting = Settings::values.vsync_mode.GetValue();
-        const auto vsync_mode = VSyncSettingToMode(vsync_mode_setting);
-        int index{};
-        for (const auto mode : vsync_mode_combobox_enum_map) {
-            if (mode == vsync_mode) {
-                break;
-            }
-            index++;
-        }
-        if (static_cast<unsigned long>(index) < vsync_mode_combobox_enum_map.size()) {
-            vsync_mode_combobox->setCurrentIndex(index);
+    const auto vsync_mode_setting = Settings::values.vsync_mode.GetValue();
+    const auto vsync_mode = VSyncSettingToMode(vsync_mode_setting);
+    int index{};
+    for (const auto mode : vsync_mode_combobox_enum_map) {
+        if (mode == vsync_mode) {
+            break;
         }
+        index++;
+    }
+    if (static_cast<unsigned long>(index) < vsync_mode_combobox_enum_map.size()) {
+        vsync_mode_combobox->setCurrentIndex(index);
     }
 
     connect(api_combobox, qOverload<int>(&QComboBox::activated), this, [this] {
         UpdateAPILayout();
-        PopulateVSyncModeSelection();
+        PopulateVSyncModeSelection(false);
     });
     connect(vulkan_device_combobox, qOverload<int>(&QComboBox::activated), this,
             [this](int device) {
                 UpdateDeviceSelection(device);
-                PopulateVSyncModeSelection();
+                PopulateVSyncModeSelection(false);
             });
     connect(shader_backend_combobox, qOverload<int>(&QComboBox::activated), this,
             [this](int backend) { UpdateShaderBackendSelection(backend); });
@@ -147,8 +145,9 @@ ConfigureGraphics::ConfigureGraphics(
     const auto& update_screenshot_info = [this, &builder]() {
         const auto& combobox_enumerations = builder.ComboboxTranslations().at(
             Settings::EnumMetadata<Settings::AspectRatio>::Index());
-        const auto index = aspect_ratio_combobox->currentIndex();
-        const auto ratio = static_cast<Settings::AspectRatio>(combobox_enumerations[index].first);
+        const auto ratio_index = aspect_ratio_combobox->currentIndex();
+        const auto ratio =
+            static_cast<Settings::AspectRatio>(combobox_enumerations[ratio_index].first);
 
         const auto& combobox_enumerations_resolution = builder.ComboboxTranslations().at(
             Settings::EnumMetadata<Settings::ResolutionSetup>::Index());
@@ -174,11 +173,7 @@ ConfigureGraphics::ConfigureGraphics(
     }
 }
 
-void ConfigureGraphics::PopulateVSyncModeSelection() {
-    if (!Settings::IsConfiguringGlobal()) {
-        return;
-    }
-
+void ConfigureGraphics::PopulateVSyncModeSelection(bool use_setting) {
     const Settings::RendererBackend backend{GetCurrentGraphicsBackend()};
     if (backend == Settings::RendererBackend::Null) {
         vsync_mode_combobox->setEnabled(false);
@@ -189,8 +184,9 @@ void ConfigureGraphics::PopulateVSyncModeSelection() {
     const int current_index = //< current selected vsync mode from combobox
         vsync_mode_combobox->currentIndex();
     const auto current_mode = //< current selected vsync mode as a VkPresentModeKHR
-        current_index == -1 ? VSyncSettingToMode(Settings::values.vsync_mode.GetValue())
-                            : vsync_mode_combobox_enum_map[current_index];
+        current_index == -1 || use_setting
+            ? VSyncSettingToMode(Settings::values.vsync_mode.GetValue())
+            : vsync_mode_combobox_enum_map[current_index];
     int index{};
     const int device{vulkan_device_combobox->currentIndex()}; //< current selected Vulkan device
 
@@ -214,6 +210,23 @@ void ConfigureGraphics::PopulateVSyncModeSelection() {
         }
         index++;
     }
+
+    if (!Settings::IsConfiguringGlobal()) {
+        vsync_restore_global_button->setVisible(!Settings::values.vsync_mode.UsingGlobal());
+
+        const Settings::VSyncMode global_vsync_mode = Settings::values.vsync_mode.GetValue(true);
+        vsync_restore_global_button->setEnabled(
+            (backend == Settings::RendererBackend::OpenGL &&
+             (global_vsync_mode == Settings::VSyncMode::Immediate ||
+              global_vsync_mode == Settings::VSyncMode::Fifo)) ||
+            backend == Settings::RendererBackend::Vulkan);
+    }
+}
+
+void ConfigureGraphics::UpdateVsyncSetting() const {
+    const auto mode = vsync_mode_combobox_enum_map[vsync_mode_combobox->currentIndex()];
+    const auto vsync_mode = PresentModeToSetting(mode);
+    Settings::values.vsync_mode.SetValue(vsync_mode);
 }
 
 void ConfigureGraphics::UpdateDeviceSelection(int device) {
@@ -299,6 +312,33 @@ void ConfigureGraphics::Setup(const ConfigurationShared::Builder& builder) {
         } else if (setting->Id() == Settings::values.vsync_mode.Id()) {
             // Keep track of vsync_mode's combobox so we can populate it
             vsync_mode_combobox = widget->combobox;
+
+            // Since vsync is populated at runtime, we have to manually set up the button for
+            // restoring the global setting.
+            if (!Settings::IsConfiguringGlobal()) {
+                QPushButton* restore_button =
+                    ConfigurationShared::Widget::CreateRestoreGlobalButton(
+                        Settings::values.vsync_mode.UsingGlobal(), widget);
+                restore_button->setEnabled(true);
+                widget->layout()->addWidget(restore_button);
+
+                QObject::connect(restore_button, &QAbstractButton::clicked,
+                                 [restore_button, this](bool) {
+                                     Settings::values.vsync_mode.SetGlobal(true);
+                                     PopulateVSyncModeSelection(true);
+
+                                     restore_button->setVisible(false);
+                                 });
+
+                std::function<void()> set_non_global = [restore_button, this]() {
+                    Settings::values.vsync_mode.SetGlobal(false);
+                    UpdateVsyncSetting();
+                    restore_button->setVisible(true);
+                };
+                QObject::connect(widget->combobox, QOverload<int>::of(&QComboBox::activated),
+                                 [set_non_global]() { set_non_global(); });
+                vsync_restore_global_button = restore_button;
+            }
             hold_graphics.emplace(setting->Id(), widget);
         } else if (setting->Id() == Settings::values.aspect_ratio.Id()) {
             // Keep track of the aspect ratio combobox to update other UI tabs that need it
@@ -400,11 +440,7 @@ void ConfigureGraphics::ApplyConfiguration() {
         func(powered_on);
     }
 
-    if (Settings::IsConfiguringGlobal()) {
-        const auto mode = vsync_mode_combobox_enum_map[vsync_mode_combobox->currentIndex()];
-        const auto vsync_mode = PresentModeToSetting(mode);
-        Settings::values.vsync_mode.SetValue(vsync_mode);
-    }
+    UpdateVsyncSetting();
 
     Settings::values.vulkan_device.SetGlobal(true);
     Settings::values.shader_backend.SetGlobal(true);
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index 9c24a56db..5c8286836 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -62,7 +62,8 @@ private:
 
     void Setup(const ConfigurationShared::Builder& builder);
 
-    void PopulateVSyncModeSelection();
+    void PopulateVSyncModeSelection(bool use_setting);
+    void UpdateVsyncSetting() const;
     void UpdateBackgroundColorButton(QColor color);
     void UpdateAPILayout();
     void UpdateDeviceSelection(int device);
@@ -104,6 +105,7 @@ private:
     QComboBox* api_combobox;
     QComboBox* shader_backend_combobox;
     QComboBox* vsync_mode_combobox;
+    QPushButton* vsync_restore_global_button;
     QWidget* vulkan_device_widget;
     QWidget* api_widget;
     QWidget* shader_backend_widget;