56 files changed, 713 insertions, 230 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d1ad55c9c..d44d67562 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -229,7 +229,7 @@ elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR YUZU_USE_BUNDLED_BOOST)
     include_directories(SYSTEM "${Boost_INCLUDE_DIRS}")
 else()
     message(STATUS "Boost 1.73.0 or newer not found, falling back to Conan")
-    list(APPEND CONAN_REQUIRED_LIBS "boost/1.73.0")
+    list(APPEND CONAN_REQUIRED_LIBS "boost/1.78.0")
 endif()
 
 # Attempt to locate any packages that are required and report the missing ones in CONAN_REQUIRED_LIBS
diff --git a/externals/dynarmic b/externals/dynarmic
-Subproject cce7e4ee5d7b07a4609c73c053fbf57dc8c7845
+Subproject 28714ee75aa079cbb706e38bdabc8ee1f6c6951
diff --git a/src/common/settings.h b/src/common/settings.h
index 313f1fa7f..d01c0448c 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -597,6 +597,7 @@ struct Values {
     BasicSetting<std::string> program_args{std::string(), "program_args"};
     BasicSetting<bool> dump_exefs{false, "dump_exefs"};
     BasicSetting<bool> dump_nso{false, "dump_nso"};
+    BasicSetting<bool> dump_shaders{false, "dump_shaders"};
     BasicSetting<bool> enable_fs_access_log{false, "enable_fs_access_log"};
     BasicSetting<bool> reporting_services{false, "reporting_services"};
     BasicSetting<bool> quest_flag{false, "quest_flag"};
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 698c4f912..b1a746727 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -187,6 +187,8 @@ add_library(core STATIC
     hle/kernel/k_event.h
     hle/kernel/k_handle_table.cpp
     hle/kernel/k_handle_table.h
+    hle/kernel/k_interrupt_manager.cpp
+    hle/kernel/k_interrupt_manager.h
     hle/kernel/k_light_condition_variable.cpp
     hle/kernel/k_light_condition_variable.h
     hle/kernel/k_light_lock.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index aa96f709b..3f9a7f44b 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -317,6 +317,8 @@ struct System::Impl {
         is_powered_on = false;
         exit_lock = false;
 
+        gpu_core->NotifyShutdown();
+
         services.reset();
         service_manager.reset();
         cheat_engine.reset();
diff --git a/src/core/frontend/applets/controller.cpp b/src/core/frontend/applets/controller.cpp
index 6dbd38ffa..e1033b634 100644
--- a/src/core/frontend/applets/controller.cpp
+++ b/src/core/frontend/applets/controller.cpp
@@ -45,26 +45,26 @@ void DefaultControllerApplet::ReconfigureControllers(std::function<void()> callb
         // Pro Controller -> Dual Joycons -> Left Joycon/Right Joycon -> Handheld
         if (parameters.allow_pro_controller) {
             controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::ProController);
-            controller->Connect();
+            controller->Connect(true);
         } else if (parameters.allow_dual_joycons) {
             controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::JoyconDual);
-            controller->Connect();
+            controller->Connect(true);
         } else if (parameters.allow_left_joycon && parameters.allow_right_joycon) {
             // Assign left joycons to even player indices and right joycons to odd player indices.
             // We do this since Captain Toad Treasure Tracker expects a left joycon for Player 1 and
             // a right Joycon for Player 2 in 2 Player Assist mode.
             if (index % 2 == 0) {
                 controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::JoyconLeft);
-                controller->Connect();
+                controller->Connect(true);
             } else {
                 controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::JoyconRight);
-                controller->Connect();
+                controller->Connect(true);
             }
         } else if (index == 0 && parameters.enable_single_mode && parameters.allow_handheld &&
                    !Settings::values.use_docked_mode.GetValue()) {
             // We should *never* reach here under any normal circumstances.
             controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::Handheld);
-            controller->Connect();
+            controller->Connect(true);
         } else {
             UNREACHABLE_MSG("Unable to add a new controller based on the given parameters!");
         }
diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp
index ff9d7a7e3..71fc05807 100644
--- a/src/core/hid/emulated_controller.cpp
+++ b/src/core/hid/emulated_controller.cpp
@@ -879,15 +879,42 @@ void EmulatedController::SetSupportedNpadStyleTag(NpadStyleTag supported_styles)
     if (!is_connected) {
         return;
     }
-    if (!IsControllerSupported()) {
-        LOG_ERROR(Service_HID, "Controller type {} is not supported. Disconnecting controller",
-                  npad_type);
-        Disconnect();
+    if (IsControllerSupported()) {
+        return;
+    }
+
+    Disconnect();
+
+    // Fallback fullkey controllers to Pro controllers
+    if (IsControllerFullkey() && supported_style_tag.fullkey) {
+        LOG_WARNING(Service_HID, "Reconnecting controller type {} as Pro controller", npad_type);
+        SetNpadStyleIndex(NpadStyleIndex::ProController);
+        Connect();
+        return;
+    }
+
+    LOG_ERROR(Service_HID, "Controller type {} is not supported. Disconnecting controller",
+              npad_type);
+}
+
+bool EmulatedController::IsControllerFullkey(bool use_temporary_value) const {
+    const auto type = is_configuring && use_temporary_value ? tmp_npad_type : npad_type;
+    switch (type) {
+    case NpadStyleIndex::ProController:
+    case NpadStyleIndex::GameCube:
+    case NpadStyleIndex::NES:
+    case NpadStyleIndex::SNES:
+    case NpadStyleIndex::N64:
+    case NpadStyleIndex::SegaGenesis:
+        return true;
+    default:
+        return false;
     }
 }
 
-bool EmulatedController::IsControllerSupported() const {
-    switch (npad_type) {
+bool EmulatedController::IsControllerSupported(bool use_temporary_value) const {
+    const auto type = is_configuring && use_temporary_value ? tmp_npad_type : npad_type;
+    switch (type) {
     case NpadStyleIndex::ProController:
         return supported_style_tag.fullkey;
     case NpadStyleIndex::Handheld:
@@ -915,9 +942,10 @@ bool EmulatedController::IsControllerSupported() const {
     }
 }
 
-void EmulatedController::Connect() {
-    if (!IsControllerSupported()) {
-        LOG_ERROR(Service_HID, "Controller type {} is not supported", npad_type);
+void EmulatedController::Connect(bool use_temporary_value) {
+    if (!IsControllerSupported(use_temporary_value)) {
+        const auto type = is_configuring && use_temporary_value ? tmp_npad_type : npad_type;
+        LOG_ERROR(Service_HID, "Controller type {} is not supported", type);
         return;
     }
     {
diff --git a/src/core/hid/emulated_controller.h b/src/core/hid/emulated_controller.h
index e42aafebc..c0994ab4d 100644
--- a/src/core/hid/emulated_controller.h
+++ b/src/core/hid/emulated_controller.h
@@ -167,8 +167,11 @@ public:
      */
     void SetSupportedNpadStyleTag(NpadStyleTag supported_styles);
 
-    /// Sets the connected status to true
-    void Connect();
+    /**
+     * Sets the connected status to true
+     * @param use_temporary_value If true tmp_npad_type will be used
+     */
+    void Connect(bool use_temporary_value = false);
 
     /// Sets the connected status to false
     void Disconnect();
@@ -318,10 +321,17 @@ private:
     void LoadTASParams();
 
     /**
+     * @param use_temporary_value If true tmp_npad_type will be used
+     * @return true if the controller style is fullkey
+     */
+    bool IsControllerFullkey(bool use_temporary_value = false) const;
+
+    /**
      * Checks the current controller type against the supported_style_tag
+     * @param use_temporary_value If true tmp_npad_type will be used
      * @return true if the controller is supported
      */
-    bool IsControllerSupported() const;
+    bool IsControllerSupported(bool use_temporary_value = false) const;
 
     /**
      * Updates the button status of the controller
diff --git a/src/core/hle/kernel/global_scheduler_context.cpp b/src/core/hle/kernel/global_scheduler_context.cpp
index 4f4e338e3..baad2c5d6 100644
--- a/src/core/hle/kernel/global_scheduler_context.cpp
+++ b/src/core/hle/kernel/global_scheduler_context.cpp
@@ -9,6 +9,7 @@
 #include "core/hle/kernel/global_scheduler_context.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/physical_core.h"
 
 namespace Kernel {
 
@@ -42,6 +43,11 @@ void GlobalSchedulerContext::PreemptThreads() {
     for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
         const u32 priority = preemption_priorities[core_id];
         kernel.Scheduler(core_id).RotateScheduledQueue(core_id, priority);
+
+        // Signal an interrupt occurred. For core 3, this is a certainty, as preemption will result
+        // in the rotator thread being scheduled. For cores 0-2, this is to simulate or system
+        // interrupts that may have occurred.
+        kernel.PhysicalCore(core_id).Interrupt();
     }
 }
 
diff --git a/src/core/hle/kernel/k_interrupt_manager.cpp b/src/core/hle/kernel/k_interrupt_manager.cpp
new file mode 100644
index 000000000..e5dd39751
--- /dev/null
+++ b/src/core/hle/kernel/k_interrupt_manager.cpp
@@ -0,0 +1,34 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/kernel/k_interrupt_manager.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_thread.h"
+#include "core/hle/kernel/kernel.h"
+
+namespace Kernel::KInterruptManager {
+
+void HandleInterrupt(KernelCore& kernel, s32 core_id) {
+    auto* process = kernel.CurrentProcess();
+    if (!process) {
+        return;
+    }
+
+    auto& scheduler = kernel.Scheduler(core_id);
+    auto& current_thread = *scheduler.GetCurrentThread();
+
+    // If the user disable count is set, we may need to pin the current thread.
+    if (current_thread.GetUserDisableCount() && !process->GetPinnedThread(core_id)) {
+        KScopedSchedulerLock sl{kernel};
+
+        // Pin the current thread.
+        process->PinCurrentThread(core_id);
+
+        // Set the interrupt flag for the thread.
+        scheduler.GetCurrentThread()->SetInterruptFlag();
+    }
+}
+
+} // namespace Kernel::KInterruptManager
diff --git a/src/core/hle/kernel/k_interrupt_manager.h b/src/core/hle/kernel/k_interrupt_manager.h
new file mode 100644
index 000000000..05924801e
--- /dev/null
+++ b/src/core/hle/kernel/k_interrupt_manager.h
@@ -0,0 +1,17 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Kernel {
+
+class KernelCore;
+
+namespace KInterruptManager {
+void HandleInterrupt(KernelCore& kernel, s32 core_id);
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_memory_block.h b/src/core/hle/kernel/k_memory_block.h
index fd491146f..9e51c33ce 100644
--- a/src/core/hle/kernel/k_memory_block.h
+++ b/src/core/hle/kernel/k_memory_block.h
@@ -120,7 +120,7 @@ static_assert(static_cast<u32>(KMemoryState::CodeOut) == 0x00402015);
 
 enum class KMemoryPermission : u8 {
     None = 0,
-    Mask = static_cast<u8>(~None),
+    All = static_cast<u8>(~None),
 
     Read = 1 << 0,
     Write = 1 << 1,
diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp
index f2f88c147..4da509224 100644
--- a/src/core/hle/kernel/k_page_table.cpp
+++ b/src/core/hle/kernel/k_page_table.cpp
@@ -264,9 +264,9 @@ ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_
     ASSERT(heap_last < stack_start || stack_last < heap_start);
     ASSERT(heap_last < kmap_start || kmap_last < heap_start);
 
-    current_heap_addr = heap_region_start;
-    heap_capacity = 0;
-    physical_memory_usage = 0;
+    current_heap_end = heap_region_start;
+    max_heap_size = 0;
+    mapped_physical_memory_size = 0;
     memory_pool = pool;
 
     page_table_impl.Resize(address_space_width, PageBits);
@@ -306,7 +306,7 @@ ResultCode KPageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std:
     KMemoryState state{};
     KMemoryPermission perm{};
     CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, src_addr, size, KMemoryState::All,
-                                  KMemoryState::Normal, KMemoryPermission::Mask,
+                                  KMemoryState::Normal, KMemoryPermission::All,
                                   KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask,
                                   KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
 
@@ -465,7 +465,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
 
     MapPhysicalMemory(page_linked_list, addr, end_addr);
 
-    physical_memory_usage += remaining_size;
+    mapped_physical_memory_size += remaining_size;
 
     const std::size_t num_pages{size / PageSize};
     block_manager->Update(addr, num_pages, KMemoryState::Free, KMemoryPermission::None,
@@ -507,7 +507,7 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
 
     auto process{system.Kernel().CurrentProcess()};
     process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size);
-    physical_memory_usage -= mapped_size;
+    mapped_physical_memory_size -= mapped_size;
 
     return ResultSuccess;
 }
@@ -554,7 +554,7 @@ ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
     KMemoryState src_state{};
     CASCADE_CODE(CheckMemoryState(
         &src_state, nullptr, nullptr, src_addr, size, KMemoryState::FlagCanAlias,
-        KMemoryState::FlagCanAlias, KMemoryPermission::Mask, KMemoryPermission::ReadAndWrite,
+        KMemoryState::FlagCanAlias, KMemoryPermission::All, KMemoryPermission::ReadAndWrite,
         KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
 
     if (IsRegionMapped(dst_addr, size)) {
@@ -593,7 +593,7 @@ ResultCode KPageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) {
     KMemoryState src_state{};
     CASCADE_CODE(CheckMemoryState(
         &src_state, nullptr, nullptr, src_addr, size, KMemoryState::FlagCanAlias,
-        KMemoryState::FlagCanAlias, KMemoryPermission::Mask, KMemoryPermission::None,
+        KMemoryState::FlagCanAlias, KMemoryPermission::All, KMemoryPermission::None,
         KMemoryAttribute::Mask, KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
 
     KMemoryPermission dst_perm{};
@@ -784,7 +784,7 @@ ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemo
     CASCADE_CODE(CheckMemoryState(
         &state, nullptr, &attribute, addr, size,
         KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
-        KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::Mask,
+        KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::All,
         KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask, KMemoryAttribute::None,
         KMemoryAttribute::IpcAndDeviceMapped));
 
@@ -859,61 +859,125 @@ ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, KMemoryA
     return ResultSuccess;
 }
 
-ResultCode KPageTable::SetHeapCapacity(std::size_t new_heap_capacity) {
+ResultCode KPageTable::SetMaxHeapSize(std::size_t size) {
+    // Lock the table.
     std::lock_guard lock{page_table_lock};
-    heap_capacity = new_heap_capacity;
-    return ResultSuccess;
-}
 
-ResultVal<VAddr> KPageTable::SetHeapSize(std::size_t size) {
+    // Only process page tables are allowed to set heap size.
+    ASSERT(!this->IsKernel());
 
-    if (size > heap_region_end - heap_region_start) {
-        return ResultOutOfMemory;
-    }
+    max_heap_size = size;
 
-    const u64 previous_heap_size{GetHeapSize()};
-
-    UNIMPLEMENTED_IF_MSG(previous_heap_size > size, "Heap shrink is unimplemented");
+    return ResultSuccess;
+}
 
-    // Increase the heap size
+ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
+    // Try to perform a reduction in heap, instead of an extension.
+    VAddr cur_address{};
+    std::size_t allocation_size{};
     {
-        std::lock_guard lock{page_table_lock};
-
-        const u64 delta{size - previous_heap_size};
-
-        // Reserve memory for the heap extension.
-        KScopedResourceReservation memory_reservation(
-            system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
-            delta);
-
-        if (!memory_reservation.Succeeded()) {
-            LOG_ERROR(Kernel, "Could not reserve heap extension of size {:X} bytes", delta);
-            return ResultLimitReached;
+        // Lock the table.
+        std::lock_guard lk(page_table_lock);
+
+        // Validate that setting heap size is possible at all.
+        R_UNLESS(!is_kernel, ResultOutOfMemory);
+        R_UNLESS(size <= static_cast<std::size_t>(heap_region_end - heap_region_start),
+                 ResultOutOfMemory);
+        R_UNLESS(size <= max_heap_size, ResultOutOfMemory);
+
+        if (size < GetHeapSize()) {
+            // The size being requested is less than the current size, so we need to free the end of
+            // the heap.
+
+            // Validate memory state.
+            std::size_t num_allocator_blocks;
+            R_TRY(this->CheckMemoryState(std::addressof(num_allocator_blocks),
+                                         heap_region_start + size, GetHeapSize() - size,
+                                         KMemoryState::All, KMemoryState::Normal,
+                                         KMemoryPermission::All, KMemoryPermission::ReadAndWrite,
+                                         KMemoryAttribute::All, KMemoryAttribute::None));
+
+            // Unmap the end of the heap.
+            const auto num_pages = (GetHeapSize() - size) / PageSize;
+            R_TRY(Operate(heap_region_start + size, num_pages, KMemoryPermission::None,
+                          OperationType::Unmap));
+
+            // Release the memory from the resource limit.
+            system.Kernel().CurrentProcess()->GetResourceLimit()->Release(
+                LimitableResource::PhysicalMemory, num_pages * PageSize);
+
+            // Apply the memory block update.
+            block_manager->Update(heap_region_start + size, num_pages, KMemoryState::Free,
+                                  KMemoryPermission::None, KMemoryAttribute::None);
+
+            // Update the current heap end.
+            current_heap_end = heap_region_start + size;
+
+            // Set the output.
+            *out = heap_region_start;
+            return ResultSuccess;
+        } else if (size == GetHeapSize()) {
+            // The size requested is exactly the current size.
+            *out = heap_region_start;
+            return ResultSuccess;
+        } else {
+            // We have to allocate memory. Determine how much to allocate and where while the table
+            // is locked.
+            cur_address = current_heap_end;
+            allocation_size = size - GetHeapSize();
         }
+    }
 
-        KPageLinkedList page_linked_list;
-        const std::size_t num_pages{delta / PageSize};
+    // Reserve memory for the heap extension.
+    KScopedResourceReservation memory_reservation(
+        system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
+        allocation_size);
+    R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
 
-        CASCADE_CODE(
-            system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool));
+    // Allocate pages for the heap extension.
+    KPageLinkedList page_linked_list;
+    R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, allocation_size / PageSize,
+                                                   memory_pool));
 
-        if (IsRegionMapped(current_heap_addr, delta)) {
-            return ResultInvalidCurrentMemory;
+    // Map the pages.
+    {
+        // Lock the table.
+        std::lock_guard lk(page_table_lock);
+
+        // Ensure that the heap hasn't changed since we began executing.
+        ASSERT(cur_address == current_heap_end);
+
+        // Check the memory state.
+        std::size_t num_allocator_blocks{};
+        R_TRY(this->CheckMemoryState(std::addressof(num_allocator_blocks), current_heap_end,
+                                     allocation_size, KMemoryState::All, KMemoryState::Free,
+                                     KMemoryPermission::None, KMemoryPermission::None,
+                                     KMemoryAttribute::None, KMemoryAttribute::None));
+
+        // Map the pages.
+        const auto num_pages = allocation_size / PageSize;
+        R_TRY(Operate(current_heap_end, num_pages, page_linked_list, OperationType::MapGroup));
+
+        // Clear all the newly allocated pages.
+        for (std::size_t cur_page = 0; cur_page < num_pages; ++cur_page) {
+            std::memset(system.Memory().GetPointer(current_heap_end + (cur_page * PageSize)), 0,
+                        PageSize);
         }
 
-        CASCADE_CODE(
-            Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup));
-
-        // Succeeded in allocation, commit the resource reservation
+        // We succeeded, so commit our memory reservation.
         memory_reservation.Commit();
 
-        block_manager->Update(current_heap_addr, num_pages, KMemoryState::Normal,
-                              KMemoryPermission::ReadAndWrite);
+        // Apply the memory block update.
+        block_manager->Update(current_heap_end, num_pages, KMemoryState::Normal,
+                              KMemoryPermission::ReadAndWrite, KMemoryAttribute::None);
 
-        current_heap_addr = heap_region_start + size;
-    }
+        // Update the current heap end.
+        current_heap_end = heap_region_start + size;
 
-    return heap_region_start;
+        // Set the output.
+        *out = heap_region_start;
+        return ResultSuccess;
+    }
 }
 
 ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align,
@@ -1005,7 +1069,7 @@ ResultCode KPageTable::LockForCodeMemory(VAddr addr, std::size_t size) {
 
     if (const ResultCode result{CheckMemoryState(
             nullptr, &old_perm, nullptr, addr, size, KMemoryState::FlagCanCodeMemory,
-            KMemoryState::FlagCanCodeMemory, KMemoryPermission::Mask,
+            KMemoryState::FlagCanCodeMemory, KMemoryPermission::All,
             KMemoryPermission::UserReadWrite, KMemoryAttribute::All, KMemoryAttribute::None)};
         result.IsError()) {
         return result;
@@ -1058,9 +1122,8 @@ ResultCode KPageTable::InitializeMemoryLayout(VAddr start, VAddr end) {
 
 bool KPageTable::IsRegionMapped(VAddr address, u64 size) {
     return CheckMemoryState(address, size, KMemoryState::All, KMemoryState::Free,
-                            KMemoryPermission::Mask, KMemoryPermission::None,
-                            KMemoryAttribute::Mask, KMemoryAttribute::None,
-                            KMemoryAttribute::IpcAndDeviceMapped)
+                            KMemoryPermission::All, KMemoryPermission::None, KMemoryAttribute::Mask,
+                            KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)
         .IsError();
 }
 
diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h
index db08ea8ce..564410dca 100644
--- a/src/core/hle/kernel/k_page_table.h
+++ b/src/core/hle/kernel/k_page_table.h
@@ -50,8 +50,8 @@ public:
     ResultCode SetMemoryPermission(VAddr addr, std::size_t size, Svc::MemoryPermission perm);
     ResultCode SetMemoryAttribute(VAddr addr, std::size_t size, KMemoryAttribute mask,
                                   KMemoryAttribute value);
-    ResultCode SetHeapCapacity(std::size_t new_heap_capacity);
-    ResultVal<VAddr> SetHeapSize(std::size_t size);
+    ResultCode SetMaxHeapSize(std::size_t size);
+    ResultCode SetHeapSize(VAddr* out, std::size_t size);
     ResultVal<VAddr> AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align,
                                           bool is_map_only, VAddr region_start,
                                           std::size_t region_num_pages, KMemoryState state,
@@ -183,14 +183,15 @@ public:
     constexpr VAddr GetAliasCodeRegionSize() const {
         return alias_code_region_end - alias_code_region_start;
     }
+    size_t GetNormalMemorySize() {
+        std::lock_guard lk(page_table_lock);
+        return GetHeapSize() + mapped_physical_memory_size;
+    }
     constexpr std::size_t GetAddressSpaceWidth() const {
         return address_space_width;
     }
-    constexpr std::size_t GetHeapSize() {
-        return current_heap_addr - heap_region_start;
-    }
-    constexpr std::size_t GetTotalHeapSize() {
-        return GetHeapSize() + physical_memory_usage;
+    constexpr std::size_t GetHeapSize() const {
+        return current_heap_end - heap_region_start;
     }
     constexpr bool IsInsideAddressSpace(VAddr address, std::size_t size) const {
         return address_space_start <= address && address + size - 1 <= address_space_end - 1;
@@ -270,10 +271,8 @@ private:
     VAddr code_region_end{};
     VAddr alias_code_region_start{};
     VAddr alias_code_region_end{};
-    VAddr current_heap_addr{};
 
-    std::size_t heap_capacity{};
-    std::size_t physical_memory_usage{};
+    std::size_t mapped_physical_memory_size{};
     std::size_t max_heap_size{};
     std::size_t max_physical_memory_size{};
     std::size_t address_space_width{};
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index aee313995..bf98a51e2 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -172,7 +172,7 @@ void KProcess::DecrementThreadCount() {
 
 u64 KProcess::GetTotalPhysicalMemoryAvailable() const {
     const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) +
-                       page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
+                       page_table->GetNormalMemorySize() + GetSystemResourceSize() + image_size +
                        main_thread_stack_size};
     if (const auto pool_size = kernel.MemoryManager().GetSize(KMemoryManager::Pool::Application);
         capacity != pool_size) {
@@ -189,7 +189,7 @@ u64 KProcess::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
 }
 
 u64 KProcess::GetTotalPhysicalMemoryUsed() const {
-    return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() +
+    return image_size + main_thread_stack_size + page_table->GetNormalMemorySize() +
            GetSystemResourceSize();
 }
 
@@ -220,30 +220,28 @@ bool KProcess::ReleaseUserException(KThread* thread) {
     }
 }
 
-void KProcess::PinCurrentThread() {
+void KProcess::PinCurrentThread(s32 core_id) {
     ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
     // Get the current thread.
-    const s32 core_id = GetCurrentCoreId(kernel);
-    KThread* cur_thread = GetCurrentThreadPointer(kernel);
+    KThread* cur_thread = kernel.Scheduler(static_cast<std::size_t>(core_id)).GetCurrentThread();
 
     // If the thread isn't terminated, pin it.
     if (!cur_thread->IsTerminationRequested()) {
         // Pin it.
         PinThread(core_id, cur_thread);
-        cur_thread->Pin();
+        cur_thread->Pin(core_id);
 
         // An update is needed.
         KScheduler::SetSchedulerUpdateNeeded(kernel);
     }
 }
 
-void KProcess::UnpinCurrentThread() {
+void KProcess::UnpinCurrentThread(s32 core_id) {
     ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
     // Get the current thread.
-    const s32 core_id = GetCurrentCoreId(kernel);
-    KThread* cur_thread = GetCurrentThreadPointer(kernel);
+    KThread* cur_thread = kernel.Scheduler(static_cast<std::size_t>(core_id)).GetCurrentThread();
 
     // Unpin it.
     cur_thread->Unpin();
@@ -410,8 +408,8 @@ void KProcess::Run(s32 main_thread_priority, u64 stack_size) {
     resource_limit->Reserve(LimitableResource::Threads, 1);
     resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
 
-    const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size};
-    ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError());
+    const std::size_t heap_capacity{memory_usage_capacity - (main_thread_stack_size + image_size)};
+    ASSERT(!page_table->SetMaxHeapSize(heap_capacity).IsError());
 
     ChangeStatus(ProcessStatus::Running);
 
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index cb93c7e24..e7c8b5838 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -345,8 +345,8 @@ public:
 
     bool IsSignaled() const override;
 
-    void PinCurrentThread();
-    void UnpinCurrentThread();
+    void PinCurrentThread(s32 core_id);
+    void UnpinCurrentThread(s32 core_id);
     void UnpinThread(KThread* thread);
 
     KLightLock& GetStateLock() {
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp
index 277201de4..31cec990e 100644
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -15,6 +15,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/cpu_manager.h"
+#include "core/hle/kernel/k_interrupt_manager.h"
 #include "core/hle/kernel/k_process.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
@@ -53,6 +54,13 @@ void KScheduler::RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedul
         }
         cores_pending_reschedule &= ~(1ULL << core);
     }
+
+    for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; ++core_id) {
+        if (kernel.PhysicalCore(core_id).IsInterrupted()) {
+            KInterruptManager::HandleInterrupt(kernel, static_cast<s32>(core_id));
+        }
+    }
+
     if (must_context_switch) {
         auto core_scheduler = kernel.CurrentScheduler();
         kernel.ExitSVCProfile();
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index b8c993748..71e029a3f 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <atomic>
 #include <cinttypes>
 #include <optional>
 #include <vector>
@@ -33,6 +34,7 @@
 #include "core/hle/kernel/svc_results.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/hle/result.h"
+#include "core/memory.h"
 
 #ifdef ARCHITECTURE_x86_64
 #include "core/arm/dynarmic/arm_dynarmic_32.h"
@@ -63,6 +65,13 @@ namespace Kernel {
 
 namespace {
 
+struct ThreadLocalRegion {
+    static constexpr std::size_t MessageBufferSize = 0x100;
+    std::array<u32, MessageBufferSize / sizeof(u32)> message_buffer;
+    std::atomic_uint16_t disable_count;
+    std::atomic_uint16_t interrupt_flag;
+};
+
 class ThreadQueueImplForKThreadSleep final : public KThreadQueueWithoutEndWait {
 public:
     explicit ThreadQueueImplForKThreadSleep(KernelCore& kernel_)
@@ -346,7 +355,7 @@ void KThread::StartTermination() {
     if (parent != nullptr) {
         parent->ReleaseUserException(this);
         if (parent->GetPinnedThread(GetCurrentCoreId(kernel)) == this) {
-            parent->UnpinCurrentThread();
+            parent->UnpinCurrentThread(core_id);
         }
     }
 
@@ -372,7 +381,7 @@ void KThread::StartTermination() {
     this->Close();
 }
 
-void KThread::Pin() {
+void KThread::Pin(s32 current_core) {
     ASSERT(kernel.GlobalSchedulerContext().IsLocked());
 
     // Set ourselves as pinned.
@@ -389,7 +398,6 @@ void KThread::Pin() {
 
         // Bind ourselves to this core.
         const s32 active_core = GetActiveCore();
-        const s32 current_core = GetCurrentCoreId(kernel);
 
         SetActiveCore(current_core);
         physical_ideal_core_id = current_core;
@@ -482,6 +490,36 @@ void KThread::Unpin() {
     }
 }
 
+u16 KThread::GetUserDisableCount() const {
+    if (!IsUserThread()) {
+        // We only emulate TLS for user threads
+        return {};
+    }
+
+    auto& memory = kernel.System().Memory();
+    return memory.Read16(tls_address + offsetof(ThreadLocalRegion, disable_count));
+}
+
+void KThread::SetInterruptFlag() {
+    if (!IsUserThread()) {
+        // We only emulate TLS for user threads
+        return;
+    }
+
+    auto& memory = kernel.System().Memory();
+    memory.Write16(tls_address + offsetof(ThreadLocalRegion, interrupt_flag), 1);
+}
+
+void KThread::ClearInterruptFlag() {
+    if (!IsUserThread()) {
+        // We only emulate TLS for user threads
+        return;
+    }
+
+    auto& memory = kernel.System().Memory();
+    memory.Write16(tls_address + offsetof(ThreadLocalRegion, interrupt_flag), 0);
+}
+
 ResultCode KThread::GetCoreMask(s32* out_ideal_core, u64* out_affinity_mask) {
     KScopedSchedulerLock sl{kernel};
 
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index c8a08bd71..83dfde69b 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -307,6 +307,10 @@ public:
         return parent != nullptr;
     }
 
+    u16 GetUserDisableCount() const;
+    void SetInterruptFlag();
+    void ClearInterruptFlag();
+
     [[nodiscard]] KThread* GetLockOwner() const {
         return lock_owner;
     }
@@ -490,7 +494,7 @@ public:
         this->GetStackParameters().disable_count--;
     }
 
-    void Pin();
+    void Pin(s32 current_core);
 
     void Unpin();
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 68cb47211..250ef9042 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -135,24 +135,15 @@ enum class ResourceLimitValueType {
 } // Anonymous namespace
 
 /// Set the process heap to a given Size. It can both extend and shrink the heap.
-static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) {
-    LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);
+static ResultCode SetHeapSize(Core::System& system, VAddr* out_address, u64 size) {
+    LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", size);
 
-    // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB.
-    if ((heap_size % 0x200000) != 0) {
-        LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}",
-                  heap_size);
-        return ResultInvalidSize;
-    }
-
-    if (heap_size >= 0x200000000) {
-        LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size);
-        return ResultInvalidSize;
-    }
-
-    auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
+    // Validate size.
+    R_UNLESS(Common::IsAligned(size, HeapSizeAlignment), ResultInvalidSize);
+    R_UNLESS(size < MainMemorySizeMax, ResultInvalidSize);
 
-    CASCADE_RESULT(*heap_addr, page_table.SetHeapSize(heap_size));
+    // Set the heap size.
+    R_TRY(system.Kernel().CurrentProcess()->PageTable().SetHeapSize(out_address, size));
 
     return ResultSuccess;
 }
@@ -2036,6 +2027,25 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, Svc::Sign
                                                                   count);
 }
 
+static void SynchronizePreemptionState(Core::System& system) {
+    auto& kernel = system.Kernel();
+
+    // Lock the scheduler.
+    KScopedSchedulerLock sl{kernel};
+
+    // If the current thread is pinned, unpin it.
+    KProcess* cur_process = system.Kernel().CurrentProcess();
+    const auto core_id = GetCurrentCoreId(kernel);
+
+    if (cur_process->GetPinnedThread(core_id) == GetCurrentThreadPointer(kernel)) {
+        // Clear the current thread's interrupt flag.
+        GetCurrentThread(kernel).ClearInterruptFlag();
+
+        // Unpin the current thread.
+        cur_process->UnpinCurrentThread(core_id);
+    }
+}
+
 static ResultCode SignalToAddress32(Core::System& system, u32 address, Svc::SignalType signal_type,
                                     s32 value, s32 count) {
     return SignalToAddress(system, address, signal_type, value, count);
@@ -2806,7 +2816,7 @@ static const FunctionDef SVC_Table_64[] = {
     {0x33, SvcWrap64<GetThreadContext>, "GetThreadContext"},
     {0x34, SvcWrap64<WaitForAddress>, "WaitForAddress"},
     {0x35, SvcWrap64<SignalToAddress>, "SignalToAddress"},
-    {0x36, nullptr, "SynchronizePreemptionState"},
+    {0x36, SvcWrap64<SynchronizePreemptionState>, "SynchronizePreemptionState"},
     {0x37, nullptr, "Unknown"},
     {0x38, nullptr, "Unknown"},
     {0x39, nullptr, "Unknown"},
diff --git a/src/core/hle/kernel/svc_common.h b/src/core/hle/kernel/svc_common.h
index 60ea2c405..25de6e437 100644
--- a/src/core/hle/kernel/svc_common.h
+++ b/src/core/hle/kernel/svc_common.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "common/common_types.h"
+#include "common/literals.h"
 
 namespace Kernel {
 using Handle = u32;
@@ -12,9 +13,13 @@ using Handle = u32;
 
 namespace Kernel::Svc {
 
+using namespace Common::Literals;
+
 constexpr s32 ArgumentHandleCountMax = 0x40;
 constexpr u32 HandleWaitMask{1u << 30};
 
+constexpr inline std::size_t HeapSizeAlignment = 2_MiB;
+
 constexpr inline Handle InvalidHandle = Handle(0);
 
 enum PseudoHandle : Handle {
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
index 6f98d0998..7434a1f92 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -126,6 +126,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
     }
 }
 
+void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32) {
+    switch (attr) {
+    case IR::Attribute::PrimitiveId:
+        ctx.Add("MOV.S {}.x,primitive.id;", inst);
+        break;
+    case IR::Attribute::InstanceId:
+        ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
+        break;
+    case IR::Attribute::VertexId:
+        ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
+        break;
+    default:
+        throw NotImplementedException("Get U32 attribute {}", attr);
+    }
+}
+
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
                       [[maybe_unused]] ScalarU32 vertex) {
     const u32 element{static_cast<u32>(attr) % 4};
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index 1f343bff5..b48007856 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -50,6 +50,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
 void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
 void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
 void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
+void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
 void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
 void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
index 0f2668d9e..e0ead7a53 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
@@ -7,6 +7,7 @@
 #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
 #include "shader_recompiler/backend/glsl/glsl_emit_context.h"
 #include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
 
 namespace Shader::Backend::GLSL {
 namespace {
@@ -30,8 +31,9 @@ void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value)
     inst.DestructiveAddUsage(1);
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)};
     const auto input{ctx.var_alloc.Consume(value)};
+    const auto suffix{ctx.profile.has_gl_bool_ref_bug ? "?true:false" : ""};
     if (ret != input) {
-        ctx.Add("{}={};", ret, input);
+        ctx.Add("{}={}{};", ret, input, suffix);
     }
 }
 
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 6477bd192..0c1fbc7b1 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -102,39 +102,46 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const
 
 void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                    const IR::Value& offset) {
-    GetCbuf8(ctx, inst, binding, offset, "ftou");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
+    GetCbuf8(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                    const IR::Value& offset) {
-    GetCbuf8(ctx, inst, binding, offset, "ftoi");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
+    GetCbuf8(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
-    GetCbuf16(ctx, inst, binding, offset, "ftou");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
+    GetCbuf16(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
-    GetCbuf16(ctx, inst, binding, offset, "ftoi");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
+    GetCbuf16(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
-    GetCbuf(ctx, ret, binding, offset, 32, "ftou");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
+    GetCbuf(ctx, ret, binding, offset, 32, cast);
 }
 
 void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
-    GetCbuf(ctx, ret, binding, offset, 32);
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""};
+    GetCbuf(ctx, ret, binding, offset, 32, cast);
 }
 
 void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                       const IR::Value& offset) {
     const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
     if (offset.IsImmediate()) {
         static constexpr u32 cbuf_size{0x10000};
         const u32 u32_offset{offset.U32()};
@@ -145,26 +152,26 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
             return;
         }
         if (u32_offset % 2 == 0) {
-            ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16,
+            ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16,
                          OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
         } else {
-            ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16,
-                         OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16,
-                         OffsetSwizzle(u32_offset + 4));
+            ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf,
+                         u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf,
+                         (u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4));
         }
         return;
     }
     const auto offset_var{ctx.var_alloc.Consume(offset)};
     if (!ctx.profile.has_gl_component_indexing_bug) {
-        ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));",
-                     inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var);
+        ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst,
+                     cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var);
         return;
     }
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
     const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
     for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
-        ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset,
-                swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var,
+        ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset,
+                swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var,
                 "xyzw"[(swizzle + 1) % 4]);
     }
 }
@@ -221,6 +228,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
     }
 }
 
+void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view) {
+    switch (attr) {
+    case IR::Attribute::PrimitiveId:
+        ctx.AddU32("{}=uint(gl_PrimitiveID);", inst);
+        break;
+    case IR::Attribute::InstanceId:
+        ctx.AddU32("{}=uint(gl_InstanceID);", inst);
+        break;
+    case IR::Attribute::VertexId:
+        ctx.AddU32("{}=uint(gl_VertexID);", inst);
+        break;
+    default:
+        throw NotImplementedException("Get U32 attribute {}", attr);
+    }
+}
+
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
                       [[maybe_unused]] std::string_view vertex) {
     if (IR::IsGeneric(attr)) {
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
index b765a251b..474189d87 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
@@ -125,11 +125,11 @@ void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i
 }
 
 void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
-    ctx.AddF32("{}=-({});", inst, value);
+    ctx.AddF32("{}=0.f-({});", inst, value);
 }
 
 void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
-    ctx.AddF64("{}=-({});", inst, value);
+    ctx.AddF64("{}=double(0.)-({});", inst, value);
 }
 
 void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
index f86502e4c..6cabbc717 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -60,6 +60,8 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
                       const IR::Value& offset);
 void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
                       std::string_view vertex);
+void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
+                         std::string_view vertex);
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
                       std::string_view vertex);
 void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
index 44060df33..b0d85be99 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
@@ -87,11 +87,11 @@ void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin
 }
 
 void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
-    ctx.AddU32("{}=uint(-({}));", inst, value);
+    ctx.AddU32("{}=uint(int(0)-int({}));", inst, value);
 }
 
 void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
-    ctx.AddU64("{}=-({});", inst, value);
+    ctx.AddU64("{}=uint64_t(int64_t(0)-int64_t({}));", inst, value);
 }
 
 void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
index b8ddafe48..fcf620b79 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
@@ -90,7 +90,9 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value&
     if (phi_reg == val_reg) {
         return;
     }
-    ctx.Add("{}={};", phi_reg, val_reg);
+    const bool needs_workaround{ctx.profile.has_gl_bool_ref_bug && phi_type == IR::Type::U1};
+    const auto suffix{needs_workaround ? "?true:false" : ""};
+    ctx.Add("{}={}{};", phi_reg, val_reg, suffix);
 }
 
 void EmitPrologue(EmitContext& ctx) {
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index bc9d2a904..bb7f1a0fd 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -428,9 +428,10 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) {
         return;
     }
     for (const auto& desc : info.constant_buffer_descriptors) {
-        header += fmt::format(
-            "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};",
-            bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024);
+        const auto cbuf_type{profile.has_gl_cbuf_ftou_bug ? "uvec4" : "vec4"};
+        header += fmt::format("layout(std140,binding={}) uniform {}_cbuf_{}{{{} {}_cbuf{}[{}];}};",
+                              bindings.uniform_buffer, stage_name, desc.index, cbuf_type,
+                              stage_name, desc.index, 4 * 1024);
         bindings.uniform_buffer += desc.count;
     }
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 6ce7ed12a..50918317f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -30,11 +30,20 @@ struct FuncTraits<ReturnType_ (*)(Args...)> {
     using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
 };
 
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4702) // Ignore unreachable code warning
+#endif
+
 template <auto func, typename... Args>
 void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
     inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
 }
 
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
 template <typename ArgType>
 ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
     if constexpr (std::is_same_v<ArgType, Id>) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 14f470812..8ea730c80 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -355,6 +355,31 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
     }
 }
 
+Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
+    switch (attr) {
+    case IR::Attribute::PrimitiveId:
+        return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
+    case IR::Attribute::InstanceId:
+        if (ctx.profile.support_vertex_instance_id) {
+            return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
+        } else {
+            const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)};
+            const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)};
+            return ctx.OpISub(ctx.U32[1], index, base);
+        }
+    case IR::Attribute::VertexId:
+        if (ctx.profile.support_vertex_instance_id) {
+            return ctx.OpLoad(ctx.U32[1], ctx.vertex_id);
+        } else {
+            const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
+            const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
+            return ctx.OpISub(ctx.U32[1], index, base);
+        }
+    default:
+        throw NotImplementedException("Read U32 attribute {}", attr);
+    }
+}
+
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
     const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
     if (!output) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 6cd22dd3e..887112deb 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -53,6 +53,7 @@ Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
 Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
+Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex);
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
 Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
 void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index b4df73e8a..db16429d4 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -31,6 +31,8 @@ public:
 
     [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
 
+    virtual void Dump(u64 hash) = 0;
+
     [[nodiscard]] const ProgramHeader& SPH() const noexcept {
         return sph;
     }
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 6929919df..b94ce7406 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -40,6 +40,7 @@ OPCODE(GetCbufU32,                                          U32,            U32,
 OPCODE(GetCbufF32,                                          F32,            U32,            U32,                                                            )
 OPCODE(GetCbufU32x2,                                        U32x2,          U32,            U32,                                                            )
 OPCODE(GetAttribute,                                        F32,            Attribute,      U32,                                                            )
+OPCODE(GetAttributeU32,                                     U32,            Attribute,      U32,                                                            )
 OPCODE(SetAttribute,                                        Void,           Attribute,      F32,            U32,                                            )
 OPCODE(GetAttributeIndexed,                                 F32,            U32,            U32,                                                            )
 OPCODE(SetAttributeIndexed,                                 Void,           U32,            F32,            U32,                                            )
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 1e476d83d..a78c469be 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -389,6 +389,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
         info.uses_demote_to_helper_invocation = true;
         break;
     case IR::Opcode::GetAttribute:
+    case IR::Opcode::GetAttributeU32:
         info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
         break;
     case IR::Opcode::SetAttribute:
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index d089fdd12..c134a12bc 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -505,6 +505,29 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
             return;
         }
     }
+    if constexpr (op == IR::Opcode::BitCastU32F32) {
+        // Workaround for new NVIDIA driver bug, where:
+        // uint attr = ftou(itof(gl_InstanceID));
+        // always returned 0.
+        // We can instead manually optimize this and work around the driver bug:
+        // uint attr = uint(gl_InstanceID);
+        if (arg_inst->GetOpcode() == IR::Opcode::GetAttribute) {
+            const IR::Attribute attr{arg_inst->Arg(0).Attribute()};
+            switch (attr) {
+            case IR::Attribute::PrimitiveId:
+            case IR::Attribute::InstanceId:
+            case IR::Attribute::VertexId:
+                break;
+            default:
+                return;
+            }
+            // Replace the bitcasts with an integer attribute get
+            inst.ReplaceOpcode(IR::Opcode::GetAttributeU32);
+            inst.SetArg(0, arg_inst->Arg(0));
+            inst.SetArg(1, arg_inst->Arg(1));
+            return;
+        }
+    }
 }
 
 void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index f0c3b3b17..dc4c806ff 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -65,6 +65,10 @@ struct Profile {
     bool has_gl_component_indexing_bug{};
     /// The precise type qualifier is broken in the fragment stage of some drivers
     bool has_gl_precise_bug{};
+    /// Some drivers do not properly support floatBitsToUint when used on cbufs
+    bool has_gl_cbuf_ftou_bug{};
+    /// Some drivers poorly optimize boolean variable references
+    bool has_gl_bool_ref_bug{};
     /// Ignores SPIR-V ordered vs unordered using GLSL semantics
     bool ignore_nan_fp_comparisons{};
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index f524f8bae..705765c99 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -311,6 +311,12 @@ struct GPU::Impl {
         cpu_context->MakeCurrent();
     }
 
+    void NotifyShutdown() {
+        std::unique_lock lk{sync_mutex};
+        shutting_down.store(true, std::memory_order::relaxed);
+        sync_cv.notify_all();
+    }
+
     /// Obtain the CPU Context
     void ObtainContext() {
         cpu_context->MakeCurrent();
@@ -858,6 +864,10 @@ void GPU::Start() {
     impl->Start();
 }
 
+void GPU::NotifyShutdown() {
+    impl->NotifyShutdown();
+}
+
 void GPU::ObtainContext() {
     impl->ObtainContext();
 }
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 500411176..3188b83ed 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -232,6 +232,9 @@ public:
     /// core timing events.
     void Start();
 
+    /// Performs any additional necessary steps to shutdown GPU emulation.
+    void NotifyShutdown();
+
     /// Obtain the CPU Context
     void ObtainContext();
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 0764ea6e0..e62912a22 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -182,17 +182,13 @@ Device::Device() {
         shader_backend = Settings::ShaderBackend::GLSL;
     }
 
-    if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia &&
-        !Settings::values.renderer_debug) {
+    if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) {
         const std::string_view driver_version = version.substr(13);
         const int version_major =
             std::atoi(driver_version.substr(0, driver_version.find(".")).data());
-
         if (version_major >= 495) {
-            LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems "
-                                       "with yuzu. Forcing GLASM as a mitigation.");
-            shader_backend = Settings::ShaderBackend::GLASM;
-            use_assembly_shaders = true;
+            has_cbuf_ftou_bug = true;
+            has_bool_ref_bug = true;
         }
     }
 
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index de9e41659..95c2e8d38 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -152,6 +152,14 @@ public:
         return need_fastmath_off;
     }
 
+    bool HasCbufFtouBug() const {
+        return has_cbuf_ftou_bug;
+    }
+
+    bool HasBoolRefBug() const {
+        return has_bool_ref_bug;
+    }
+
     Settings::ShaderBackend GetShaderBackend() const {
         return shader_backend;
     }
@@ -200,6 +208,8 @@ private:
     bool has_sparse_texture_2{};
     bool warp_size_potentially_larger_than_guest{};
     bool need_fastmath_off{};
+    bool has_cbuf_ftou_bug{};
+    bool has_bool_ref_bug{};
 
     std::string vendor_name;
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 29c6e1a5f..f71e01a34 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,6 +214,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
           .has_broken_fp16_float_controls = false,
           .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
           .has_gl_precise_bug = device.HasPreciseBug(),
+          .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(),
+          .has_gl_bool_ref_bug = device.HasBoolRefBug(),
           .ignore_nan_fp_comparisons = true,
           .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
       },
@@ -423,6 +425,11 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
 
         const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
         Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+
+        if (Settings::values.dump_shaders) {
+            env.Dump(key.unique_hashes[index]);
+        }
+
         if (!uses_vertex_a || index != 1) {
             // Normal path
             programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
@@ -509,8 +516,12 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
     LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
 
     Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
-    auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
 
+    if (Settings::values.dump_shaders) {
+        env.Dump(key.Hash());
+    }
+
+    auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
     const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)};
     Shader::RuntimeInfo info;
     info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 2728353c8..a633b73e5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -517,6 +517,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
 
         const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
         Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+        if (Settings::values.dump_shaders) {
+            env.Dump(key.unique_hashes[index]);
+        }
         if (!uses_vertex_a || index != 1) {
             // Normal path
             programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
@@ -613,6 +616,12 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
     LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
 
     Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+
+    // Dump it before error.
+    if (Settings::values.dump_shaders) {
+        env.Dump(key.Hash());
+    }
+
     auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
     const std::vector<u32> code{EmitSPIRV(profile, program)};
     device.SaveShader(code);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index c3050887c..0ba56ff1e 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1344,7 +1344,6 @@ bool Image::ScaleUp(bool ignore) {
         return false;
     }
     has_scaled = true;
-    const auto& device = runtime->device;
     if (!scaled_image) {
         const bool is_2d = info.type == ImageType::e2D;
         const u32 scaled_width = resolution.ScaleUp(info.size.width);
@@ -1352,7 +1351,7 @@ bool Image::ScaleUp(bool ignore) {
         auto scaled_info = info;
         scaled_info.size.width = scaled_width;
         scaled_info.size.height = scaled_height;
-        scaled_image = MakeImage(device, scaled_info);
+        scaled_image = MakeImage(runtime->device, scaled_info);
         auto& allocator = runtime->memory_allocator;
         scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
         ignore = false;
@@ -1361,18 +1360,13 @@ bool Image::ScaleUp(bool ignore) {
     if (ignore) {
         return true;
     }
-
     if (aspect_mask == 0) {
         aspect_mask = ImageAspectMask(info.format);
     }
-    static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
-    const PixelFormat format = StorageFormat(info.format);
-    const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
-    const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
-    if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
-        BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
-    } else {
+    if (NeedsScaleHelper()) {
         return BlitScaleHelper(true);
+    } else {
+        BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
     }
     return true;
 }
@@ -1394,15 +1388,10 @@ bool Image::ScaleDown(bool ignore) {
     if (aspect_mask == 0) {
         aspect_mask = ImageAspectMask(info.format);
     }
-    static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
-    const PixelFormat format = StorageFormat(info.format);
-    const auto& device = runtime->device;
-    const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
-    const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
-    if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
-        BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
-    } else {
+    if (NeedsScaleHelper()) {
         return BlitScaleHelper(false);
+    } else {
+        BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
     }
     return true;
 }
@@ -1470,6 +1459,20 @@ bool Image::BlitScaleHelper(bool scale_up) {
     return true;
 }
 
+bool Image::NeedsScaleHelper() const {
+    const auto& device = runtime->device;
+    const bool needs_msaa_helper = info.num_samples > 1 && device.CantBlitMSAA();
+    if (needs_msaa_helper) {
+        return true;
+    }
+    static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
+    const PixelFormat format = StorageFormat(info.format);
+    const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
+    const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+    const bool needs_blit_helper = !device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT);
+    return needs_blit_helper;
+}
+
 ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
                      ImageId image_id_, Image& image)
     : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 2f12be78b..c81130dd2 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -149,6 +149,8 @@ public:
 private:
     bool BlitScaleHelper(bool scale_up);
 
+    bool NeedsScaleHelper() const;
+
     VKScheduler* scheduler{};
     TextureCacheRuntime* runtime{};
 
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 05850afd0..7d3ae0de4 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <bit>
 #include <filesystem>
 #include <fstream>
 #include <memory>
@@ -14,6 +15,7 @@
 #include "common/common_types.h"
 #include "common/div_ceil.h"
 #include "common/fs/fs.h"
+#include "common/fs/path_util.h"
 #include "common/logging/log.h"
 #include "shader_recompiler/environment.h"
 #include "video_core/engines/kepler_compute.h"
@@ -57,6 +59,47 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
     }
 }
 
+static std::string_view StageToPrefix(Shader::Stage stage) {
+    switch (stage) {
+    case Shader::Stage::VertexB:
+        return "VB";
+    case Shader::Stage::TessellationControl:
+        return "TC";
+    case Shader::Stage::TessellationEval:
+        return "TE";
+    case Shader::Stage::Geometry:
+        return "GS";
+    case Shader::Stage::Fragment:
+        return "FS";
+    case Shader::Stage::Compute:
+        return "CS";
+    case Shader::Stage::VertexA:
+        return "VA";
+    default:
+        return "UK";
+    }
+}
+
+static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest,
+                     u32 initial_offset, Shader::Stage stage) {
+    const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
+    const auto base_dir{shader_dir / "shaders"};
+    if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+        LOG_ERROR(Common_Filesystem, "Failed to create shader dump directories");
+        return;
+    }
+    const auto prefix = StageToPrefix(stage);
+    const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)};
+    const size_t real_size = read_highest - read_lowest + initial_offset;
+    const size_t padding_needed = ((32 - (real_size % 32)) % 32);
+    std::fstream shader_file(name, std::ios::out | std::ios::binary);
+    const size_t jump_index = initial_offset / sizeof(u64);
+    shader_file.write(reinterpret_cast<const char*>(code + jump_index), real_size);
+    for (size_t i = 0; i < padding_needed; i++) {
+        shader_file.put(0);
+    }
+}
+
 GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
                                        u32 start_address_)
     : gpu_memory{&gpu_memory_}, program_base{program_base_} {
@@ -128,6 +171,10 @@ u64 GenericEnvironment::CalculateHash() const {
     return Common::CityHash64(data.get(), size);
 }
 
+void GenericEnvironment::Dump(u64 hash) {
+    DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage);
+}
+
 void GenericEnvironment::Serialize(std::ofstream& file) const {
     const u64 code_size{static_cast<u64>(CachedSize())};
     const u64 num_texture_types{static_cast<u64>(texture_types.size())};
@@ -207,6 +254,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
                                          u32 start_address_)
     : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
     gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
+    initial_offset = sizeof(sph);
     gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask;
     switch (program) {
     case Maxwell::ShaderProgram::VertexA:
@@ -323,14 +371,20 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
     if (stage == Shader::Stage::Compute) {
         file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
             .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
+        initial_offset = 0;
     } else {
         file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
+        initial_offset = sizeof(sph);
         if (stage == Shader::Stage::Geometry) {
             file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
         }
     }
 }
 
+void FileEnvironment::Dump(u64 [[maybe_unused]] hash) {
+    DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage);
+}
+
 u64 FileEnvironment::ReadInstruction(u32 address) {
     if (address < read_lowest || address > read_highest) {
         throw Shader::LogicError("Out of bounds address {}", address);
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index 6640e53d0..aae762b27 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -57,6 +57,8 @@ public:
 
     [[nodiscard]] u64 CalculateHash() const;
 
+    void Dump(u64 hash) override;
+
     void Serialize(std::ofstream& file) const;
 
 protected:
@@ -82,6 +84,7 @@ protected:
 
     u32 cached_lowest = std::numeric_limits<u32>::max();
     u32 cached_highest = 0;
+    u32 initial_offset = 0;
 
     bool has_unbound_instructions = false;
 };
@@ -149,6 +152,8 @@ public:
 
     [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
 
+    void Dump(u64 hash) override;
+
 private:
     std::unique_ptr<u64[]> code;
     std::unordered_map<u32, Shader::TextureType> texture_types;
@@ -159,6 +164,7 @@ private:
     u32 texture_bound{};
     u32 read_lowest{};
     u32 read_highest{};
+    u32 initial_offset{};
 };
 
 void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 7bd31b211..d8e19cb2f 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -364,14 +364,14 @@ template <u32 GOB_EXTENT>
 
 [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
     const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
-    const u32 layer_stride = new_info.layer_stride;
-    const s32 new_size = layer_stride * new_info.resources.layers;
-    const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
+    const u64 layer_stride = new_info.layer_stride;
+    const u64 new_size = layer_stride * new_info.resources.layers;
+    const u64 diff = overlap.gpu_addr - gpu_addr;
     if (diff > new_size) {
         return std::nullopt;
     }
-    const s32 base_layer = diff / layer_stride;
-    const s32 mip_offset = diff % layer_stride;
+    const s32 base_layer = static_cast<s32>(diff / layer_stride);
+    const s32 mip_offset = static_cast<s32>(diff % layer_stride);
     const std::array offsets = CalculateMipLevelOffsets(new_info);
     const auto end = offsets.begin() + new_info.resources.levels;
     const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset));
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 9862b815b..3d78efddc 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -638,15 +638,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
         }
     }
 
-    if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+    const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
+    if (ext_vertex_input_dynamic_state && is_intel_windows) {
         LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
         ext_vertex_input_dynamic_state = false;
     }
-    if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+    if (is_float16_supported && is_intel_windows) {
         // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
         LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math");
         is_float16_supported = false;
     }
+    if (is_intel_windows) {
+        LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
+        cant_blit_msaa = true;
+    }
 
     supports_d24_depth =
         IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 4c9d86aad..37d140ebd 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -350,6 +350,10 @@ public:
         return supports_d24_depth;
     }
 
+    bool CantBlitMSAA() const {
+        return cant_blit_msaa;
+    }
+
 private:
     /// Checks if the physical device is suitable.
     void CheckSuitability(bool requires_swapchain) const;
@@ -443,6 +447,7 @@ private:
     bool has_renderdoc{};                   ///< Has RenderDoc attached
     bool has_nsight_graphics{};             ///< Has Nsight Graphics attached
     bool supports_d24_depth{};              ///< Supports D24 depth buffers.
+    bool cant_blit_msaa{};                  ///< Does not support MSAA<->MSAA blitting.
 
     // Telemetry parameters
     std::string vendor_name;                       ///< Device's driver name.
diff --git a/src/yuzu/applets/qt_controller.cpp b/src/yuzu/applets/qt_controller.cpp
index c6222b571..4239c17f5 100644
--- a/src/yuzu/applets/qt_controller.cpp
+++ b/src/yuzu/applets/qt_controller.cpp
@@ -33,7 +33,7 @@ void UpdateController(Core::HID::EmulatedController* controller,
     }
     controller->SetNpadStyleIndex(controller_type);
     if (connected) {
-        controller->Connect();
+        controller->Connect(true);
     }
 }
 
@@ -400,36 +400,66 @@ void QtControllerSelectorDialog::SetSupportedControllers() {
 }
 
 void QtControllerSelectorDialog::SetEmulatedControllers(std::size_t player_index) {
+    const auto npad_style_set = system.HIDCore().GetSupportedStyleTag();
     auto& pairs = index_controller_type_pairs[player_index];
 
     pairs.clear();
     emulated_controllers[player_index]->clear();
 
-    pairs.emplace_back(emulated_controllers[player_index]->count(),
-                       Core::HID::NpadStyleIndex::ProController);
-    emulated_controllers[player_index]->addItem(tr("Pro Controller"));
+    const auto add_item = [&](Core::HID::NpadStyleIndex controller_type,
+                              const QString& controller_name) {
+        pairs.emplace_back(emulated_controllers[player_index]->count(), controller_type);
+        emulated_controllers[player_index]->addItem(controller_name);
+    };
 
-    pairs.emplace_back(emulated_controllers[player_index]->count(),
-                       Core::HID::NpadStyleIndex::JoyconDual);
-    emulated_controllers[player_index]->addItem(tr("Dual Joycons"));
+    if (npad_style_set.fullkey == 1) {
+        add_item(Core::HID::NpadStyleIndex::ProController, tr("Pro Controller"));
+    }
 
-    pairs.emplace_back(emulated_controllers[player_index]->count(),
-                       Core::HID::NpadStyleIndex::JoyconLeft);
-    emulated_controllers[player_index]->addItem(tr("Left Joycon"));
+    if (npad_style_set.joycon_dual == 1) {
+        add_item(Core::HID::NpadStyleIndex::JoyconDual, tr("Dual Joycons"));
+    }
 
-    pairs.emplace_back(emulated_controllers[player_index]->count(),
-                       Core::HID::NpadStyleIndex::JoyconRight);
-    emulated_controllers[player_index]->addItem(tr("Right Joycon"));
+    if (npad_style_set.joycon_left == 1) {
+        add_item(Core::HID::NpadStyleIndex::JoyconLeft, tr("Left Joycon"));
+    }
 
-    if (player_index == 0) {
-        pairs.emplace_back(emulated_controllers[player_index]->count(),
-                           Core::HID::NpadStyleIndex::Handheld);
-        emulated_controllers[player_index]->addItem(tr("Handheld"));
+    if (npad_style_set.joycon_right == 1) {
+        add_item(Core::HID::NpadStyleIndex::JoyconRight, tr("Right Joycon"));
     }
 
-    pairs.emplace_back(emulated_controllers[player_index]->count(),
-                       Core::HID::NpadStyleIndex::GameCube);
-    emulated_controllers[player_index]->addItem(tr("GameCube Controller"));
+    if (player_index == 0 && npad_style_set.handheld == 1) {
+        add_item(Core::HID::NpadStyleIndex::Handheld, tr("Handheld"));
+    }
+
+    if (npad_style_set.gamecube == 1) {
+        add_item(Core::HID::NpadStyleIndex::GameCube, tr("GameCube Controller"));
+    }
+
+    // Disable all unsupported controllers
+    if (!Settings::values.enable_all_controllers) {
+        return;
+    }
+
+    if (npad_style_set.palma == 1) {
+        add_item(Core::HID::NpadStyleIndex::Pokeball, tr("Poke Ball Plus"));
+    }
+
+    if (npad_style_set.lark == 1) {
+        add_item(Core::HID::NpadStyleIndex::NES, tr("NES Controller"));
+    }
+
+    if (npad_style_set.lucia == 1) {
+        add_item(Core::HID::NpadStyleIndex::SNES, tr("SNES Controller"));
+    }
+
+    if (npad_style_set.lagoon == 1) {
+        add_item(Core::HID::NpadStyleIndex::N64, tr("N64 Controller"));
+    }
+
+    if (npad_style_set.lager == 1) {
+        add_item(Core::HID::NpadStyleIndex::SegaGenesis, tr("Sega Genesis"));
+    }
 }
 
 Core::HID::NpadStyleIndex QtControllerSelectorDialog::GetControllerTypeFromIndex(
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index 633fc295b..c1cf4050c 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -51,6 +51,8 @@ void ConfigureDebug::SetConfiguration() {
     ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue());
     ui->enable_nsight_aftermath->setEnabled(runtime_lock);
     ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue());
+    ui->dump_shaders->setEnabled(runtime_lock);
+    ui->dump_shaders->setChecked(Settings::values.dump_shaders.GetValue());
     ui->disable_macro_jit->setEnabled(runtime_lock);
     ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue());
     ui->disable_loop_safety_checks->setEnabled(runtime_lock);
@@ -73,6 +75,7 @@ void ConfigureDebug::ApplyConfiguration() {
     Settings::values.renderer_shader_feedback = ui->enable_shader_feedback->isChecked();
     Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
     Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();
+    Settings::values.dump_shaders = ui->dump_shaders->isChecked();
     Settings::values.disable_shader_loop_safety_checks =
         ui->disable_loop_safety_checks->isChecked();
     Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index 0f3b51c8d..4dd870855 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -105,6 +105,19 @@
         </property>
        </widget>
       </item>
+      <item row="2" column="1">
+       <widget class="QCheckBox" name="dump_shaders">
+        <property name="enabled">
+         <bool>true</bool>
+        </property>
+        <property name="toolTip">
+         <string>When checked, it will dump all the original assembler shaders from the disk shader cache or game as found</string>
+        </property>
+        <property name="text">
+         <string>Dump Game Shaders</string>
+        </property>
+       </widget>
+      </item>
       <item row="0" column="1">
        <widget class="QCheckBox" name="disable_macro_jit">
         <property name="enabled">
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index 8a8be8e40..8c6249fc2 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -599,11 +599,11 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
                     if (is_connected) {
                         if (type == Core::HID::NpadStyleIndex::Handheld) {
                             emulated_controller_p1->Disconnect();
-                            emulated_controller_handheld->Connect();
+                            emulated_controller_handheld->Connect(true);
                             emulated_controller = emulated_controller_handheld;
                         } else {
                             emulated_controller_handheld->Disconnect();
-                            emulated_controller_p1->Connect();
+                            emulated_controller_p1->Connect(true);
                             emulated_controller = emulated_controller_p1;
                         }
                     }
@@ -718,7 +718,7 @@ void ConfigureInputPlayer::LoadConfiguration() {
 void ConfigureInputPlayer::ConnectPlayer(bool connected) {
     ui->groupConnectedController->setChecked(connected);
     if (connected) {
-        emulated_controller->Connect();
+        emulated_controller->Connect(true);
     } else {
         emulated_controller->Disconnect();
     }
@@ -907,78 +907,63 @@ void ConfigureInputPlayer::UpdateUI() {
 }
 
 void ConfigureInputPlayer::SetConnectableControllers() {
-    Core::HID::NpadStyleTag npad_style_set = hid_core.GetSupportedStyleTag();
+    const auto npad_style_set = hid_core.GetSupportedStyleTag();
     index_controller_type_pairs.clear();
     ui->comboControllerType->clear();
 
+    const auto add_item = [&](Core::HID::NpadStyleIndex controller_type,
+                              const QString& controller_name) {
+        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(), controller_type);
+        ui->comboControllerType->addItem(controller_name);
+    };
+
     if (npad_style_set.fullkey == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::ProController);
-        ui->comboControllerType->addItem(tr("Pro Controller"));
+        add_item(Core::HID::NpadStyleIndex::ProController, tr("Pro Controller"));
     }
 
     if (npad_style_set.joycon_dual == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::JoyconDual);
-        ui->comboControllerType->addItem(tr("Dual Joycons"));
+        add_item(Core::HID::NpadStyleIndex::JoyconDual, tr("Dual Joycons"));
     }
 
     if (npad_style_set.joycon_left == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::JoyconLeft);
-        ui->comboControllerType->addItem(tr("Left Joycon"));
+        add_item(Core::HID::NpadStyleIndex::JoyconLeft, tr("Left Joycon"));
     }
 
     if (npad_style_set.joycon_right == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::JoyconRight);
-        ui->comboControllerType->addItem(tr("Right Joycon"));
+        add_item(Core::HID::NpadStyleIndex::JoyconRight, tr("Right Joycon"));
     }
 
     if (player_index == 0 && npad_style_set.handheld == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::Handheld);
-        ui->comboControllerType->addItem(tr("Handheld"));
+        add_item(Core::HID::NpadStyleIndex::Handheld, tr("Handheld"));
     }
 
     if (npad_style_set.gamecube == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::GameCube);
-        ui->comboControllerType->addItem(tr("GameCube Controller"));
+        add_item(Core::HID::NpadStyleIndex::GameCube, tr("GameCube Controller"));
     }
 
     // Disable all unsupported controllers
     if (!Settings::values.enable_all_controllers) {
         return;
     }
+
     if (npad_style_set.palma == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::Pokeball);
-        ui->comboControllerType->addItem(tr("Poke Ball Plus"));
+        add_item(Core::HID::NpadStyleIndex::Pokeball, tr("Poke Ball Plus"));
     }
 
     if (npad_style_set.lark == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::NES);
-        ui->comboControllerType->addItem(tr("NES Controller"));
+        add_item(Core::HID::NpadStyleIndex::NES, tr("NES Controller"));
     }
 
     if (npad_style_set.lucia == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::SNES);
-        ui->comboControllerType->addItem(tr("SNES Controller"));
+        add_item(Core::HID::NpadStyleIndex::SNES, tr("SNES Controller"));
     }
 
     if (npad_style_set.lagoon == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::N64);
-        ui->comboControllerType->addItem(tr("N64 Controller"));
+        add_item(Core::HID::NpadStyleIndex::N64, tr("N64 Controller"));
     }
 
     if (npad_style_set.lager == 1) {
-        index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
-                                                 Core::HID::NpadStyleIndex::SegaGenesis);
-        ui->comboControllerType->addItem(tr("Sega Genesis"));
+        add_item(Core::HID::NpadStyleIndex::SegaGenesis, tr("Sega Genesis"));
     }
 }
 
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 1e02d715b..53f11a9ac 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -1547,6 +1547,8 @@ void GMainWindow::ShutdownGame() {
     emu_thread->wait();
     emu_thread = nullptr;
 
+    emulation_running = false;
+
     discord_rpc->Update();
 
     // The emulation is stopped, so closing the window or not does not matter anymore
@@ -1585,8 +1587,6 @@ void GMainWindow::ShutdownGame() {
     emu_frametime_label->setVisible(false);
     renderer_status_button->setEnabled(true);
 
-    emulation_running = false;
-
     game_path.clear();
 
     // When closing the game, destroy the GLWindow to clear the context after the game is closed