68 files changed, 971 insertions, 362 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 57922b51c..b18a2a2f5 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -53,6 +53,8 @@ add_library(common STATIC
     div_ceil.h
     dynamic_library.cpp
     dynamic_library.h
+    error.cpp
+    error.h
     fiber.cpp
     fiber.h
     fs/file.cpp
@@ -88,7 +90,6 @@ add_library(common STATIC
     microprofile.cpp
     microprofile.h
     microprofileui.h
-    misc.cpp
     nvidia_flags.cpp
     nvidia_flags.h
     page_table.cpp
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 53bd7da60..4c1e29de6 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -4,9 +4,8 @@
 
 #pragma once
 
-#include <algorithm>
 #include <array>
-#include <string>
+#include <iterator>
 
 #if !defined(ARCHITECTURE_x86_64)
 #include <cstdlib> // for exit
@@ -49,16 +48,6 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
 
 #endif // _MSC_VER ndef
 
-// Generic function to get last error message.
-// Call directly after the command or use the error num.
-// This function might change the error code.
-// Defined in misc.cpp.
-[[nodiscard]] std::string GetLastErrorMsg();
-
-// Like GetLastErrorMsg(), but passing an explicit error code.
-// Defined in misc.cpp.
-[[nodiscard]] std::string NativeErrorToString(int e);
-
 #define DECLARE_ENUM_FLAG_OPERATORS(type)                                                          \
     [[nodiscard]] constexpr type operator|(type a, type b) noexcept {                              \
         using T = std::underlying_type_t<type>;                                                    \
@@ -72,6 +61,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
         using T = std::underlying_type_t<type>;                                                    \
         return static_cast<type>(static_cast<T>(a) ^ static_cast<T>(b));                           \
     }                                                                                              \
+    [[nodiscard]] constexpr type operator<<(type a, type b) noexcept {                             \
+        using T = std::underlying_type_t<type>;                                                    \
+        return static_cast<type>(static_cast<T>(a) << static_cast<T>(b));                          \
+    }                                                                                              \
+    [[nodiscard]] constexpr type operator>>(type a, type b) noexcept {                             \
+        using T = std::underlying_type_t<type>;                                                    \
+        return static_cast<type>(static_cast<T>(a) >> static_cast<T>(b));                          \
+    }                                                                                              \
     constexpr type& operator|=(type& a, type b) noexcept {                                         \
         a = a | b;                                                                                 \
         return a;                                                                                  \
@@ -84,6 +81,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
         a = a ^ b;                                                                                 \
         return a;                                                                                  \
     }                                                                                              \
+    constexpr type& operator<<=(type& a, type b) noexcept {                                        \
+        a = a << b;                                                                                \
+        return a;                                                                                  \
+    }                                                                                              \
+    constexpr type& operator>>=(type& a, type b) noexcept {                                        \
+        a = a >> b;                                                                                \
+        return a;                                                                                  \
+    }                                                                                              \
     [[nodiscard]] constexpr type operator~(type key) noexcept {                                    \
         using T = std::underlying_type_t<type>;                                                    \
         return static_cast<type>(~static_cast<T>(key));                                            \
diff --git a/src/common/misc.cpp b/src/common/error.cpp
index 495385b9e..d4455e310 100644
--- a/src/common/misc.cpp
+++ b/src/common/error.cpp
@@ -10,7 +10,9 @@
 #include <cstring>
 #endif
 
-#include "common/common_funcs.h"
+#include "common/error.h"
+
+namespace Common {
 
 std::string NativeErrorToString(int e) {
 #ifdef _WIN32
@@ -50,3 +52,5 @@ std::string GetLastErrorMsg() {
     return NativeErrorToString(errno);
 #endif
 }
+
+} // namespace Common
diff --git a/src/common/error.h b/src/common/error.h
new file mode 100644
index 000000000..e084d4b0f
--- /dev/null
+++ b/src/common/error.h
@@ -0,0 +1,21 @@
+// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+namespace Common {
+
+// Generic function to get last error message.
+// Call directly after the command or use the error num.
+// This function might change the error code.
+// Defined in error.cpp.
+[[nodiscard]] std::string GetLastErrorMsg();
+
+// Like GetLastErrorMsg(), but passing an explicit error code.
+// Defined in error.cpp.
+[[nodiscard]] std::string NativeErrorToString(int e);
+
+} // namespace Common
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index fd3b639cd..0d2df80a8 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -54,7 +54,7 @@ void LogSettings() {
     log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
     log_setting("Renderer_UseAsynchronousGpuEmulation",
                 values.use_asynchronous_gpu_emulation.GetValue());
-    log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
+    log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
     log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
     log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
     log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
@@ -136,7 +136,7 @@ void RestoreGlobalState(bool is_powered_on) {
     values.use_disk_shader_cache.SetGlobal(true);
     values.gpu_accuracy.SetGlobal(true);
     values.use_asynchronous_gpu_emulation.SetGlobal(true);
-    values.use_nvdec_emulation.SetGlobal(true);
+    values.nvdec_emulation.SetGlobal(true);
     values.accelerate_astc.SetGlobal(true);
     values.use_vsync.SetGlobal(true);
     values.shader_backend.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index ec4d381e8..b7195670b 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -48,6 +48,12 @@ enum class FullscreenMode : u32 {
     Exclusive = 1,
 };
 
+enum class NvdecEmulation : u32 {
+    Off = 0,
+    CPU = 1,
+    GPU = 2,
+};
+
 /** The BasicSetting class is a simple resource manager. It defines a label and default value
  * alongside the actual value of the setting for simpler and less-error prone use with frontend
  * configurations. Setting a default value and label is required, though subclasses may deviate from
@@ -466,7 +472,7 @@ struct Values {
     RangedSetting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal,
                                             GPUAccuracy::Extreme, "gpu_accuracy"};
     Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
-    Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"};
+    Setting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
     Setting<bool> accelerate_astc{true, "accelerate_astc"};
     Setting<bool> use_vsync{true, "use_vsync"};
     BasicRangedSetting<u16> fps_cap{1000, 1, 1000, "fps_cap"};
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index d2c1ac60d..946a1114d 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -2,7 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "common/common_funcs.h"
+#include <string>
+
+#include "common/error.h"
 #include "common/logging/log.h"
 #include "common/thread.h"
 #ifdef __APPLE__
@@ -21,8 +23,6 @@
 #include <unistd.h>
 #endif
 
-#include <string>
-
 #ifdef __FreeBSD__
 #define cpu_set_t cpuset_t
 #endif
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 87d47e2e5..7140d0db8 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -263,6 +263,8 @@ add_library(core STATIC
     hle/service/acc/acc_u0.h
     hle/service/acc/acc_u1.cpp
     hle/service/acc/acc_u1.h
+    hle/service/acc/async_context.cpp
+    hle/service/acc/async_context.h
     hle/service/acc/errors.h
     hle/service/acc/profile_manager.cpp
     hle/service/acc/profile_manager.h
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 7e195346b..77efcabf0 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -21,34 +21,25 @@ namespace Core {
 CpuManager::CpuManager(System& system_) : system{system_} {}
 CpuManager::~CpuManager() = default;
 
-void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
-    cpu_manager.RunThread(core);
+void CpuManager::ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager,
+                             std::size_t core) {
+    cpu_manager.RunThread(stop_token, core);
 }
 
 void CpuManager::Initialize() {
     running_mode = true;
     if (is_multicore) {
         for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-            core_data[core].host_thread =
-                std::make_unique<std::thread>(ThreadStart, std::ref(*this), core);
+            core_data[core].host_thread = std::jthread(ThreadStart, std::ref(*this), core);
         }
     } else {
-        core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0);
+        core_data[0].host_thread = std::jthread(ThreadStart, std::ref(*this), 0);
     }
 }
 
 void CpuManager::Shutdown() {
     running_mode = false;
     Pause(false);
-    if (is_multicore) {
-        for (auto& data : core_data) {
-            data.host_thread->join();
-            data.host_thread.reset();
-        }
-    } else {
-        core_data[0].host_thread->join();
-        core_data[0].host_thread.reset();
-    }
 }
 
 std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() {
@@ -317,7 +308,7 @@ void CpuManager::Pause(bool paused) {
     }
 }
 
-void CpuManager::RunThread(std::size_t core) {
+void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) {
     /// Initialization
     system.RegisterCoreThread(core);
     std::string name;
@@ -361,6 +352,10 @@ void CpuManager::RunThread(std::size_t core) {
             return;
         }
 
+        if (stop_token.stop_requested()) {
+            break;
+        }
+
         auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
         data.is_running = true;
         Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext());
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index 140263b09..9d92d4af0 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -78,9 +78,9 @@ private:
     void SingleCoreRunSuspendThread();
     void SingleCorePause(bool paused);
 
-    static void ThreadStart(CpuManager& cpu_manager, std::size_t core);
+    static void ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager, std::size_t core);
 
-    void RunThread(std::size_t core);
+    void RunThread(std::stop_token stop_token, std::size_t core);
 
     struct CoreData {
         std::shared_ptr<Common::Fiber> host_context;
@@ -89,7 +89,7 @@ private:
         std::atomic<bool> is_running;
         std::atomic<bool> is_paused;
         std::atomic<bool> initialized;
-        std::unique_ptr<std::thread> host_thread;
+        std::jthread host_thread;
     };
 
     std::atomic<bool> running_mode{};
diff --git a/src/core/file_sys/kernel_executable.h b/src/core/file_sys/kernel_executable.h
index 044c554d3..79ca82f8b 100644
--- a/src/core/file_sys/kernel_executable.h
+++ b/src/core/file_sys/kernel_executable.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <string>
 #include <vector>
 
 #include "common/common_funcs.h"
diff --git a/src/core/hle/api_version.h b/src/core/hle/api_version.h
index 43d5670a9..626e30753 100644
--- a/src/core/hle/api_version.h
+++ b/src/core/hle/api_version.h
@@ -28,13 +28,20 @@ constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 12.1.0-1.0";
 
 // Atmosphere version constants.
 
-constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 0;
-constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 19;
-constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 5;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 1;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 0;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 0;
+
+constexpr u32 AtmosphereTargetFirmwareWithRevision(u8 major, u8 minor, u8 micro, u8 rev) {
+    return u32{major} << 24 | u32{minor} << 16 | u32{micro} << 8 | u32{rev};
+}
+
+constexpr u32 AtmosphereTargetFirmware(u8 major, u8 minor, u8 micro) {
+    return AtmosphereTargetFirmwareWithRevision(major, minor, micro, 0);
+}
 
 constexpr u32 GetTargetFirmware() {
-    return u32{HOS_VERSION_MAJOR} << 24 | u32{HOS_VERSION_MINOR} << 16 |
-           u32{HOS_VERSION_MICRO} << 8 | 0U;
+    return AtmosphereTargetFirmware(HOS_VERSION_MAJOR, HOS_VERSION_MINOR, HOS_VERSION_MICRO);
 }
 
 } // namespace HLE::ApiVersion
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 3a6db0b1c..901d43da9 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <functional>
 #include <memory>
 #include <string>
 #include <unordered_map>
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 882fc1492..6d9ec0a8a 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -23,6 +23,7 @@
 #include "core/hle/service/acc/acc_su.h"
 #include "core/hle/service/acc/acc_u0.h"
 #include "core/hle/service/acc/acc_u1.h"
+#include "core/hle/service/acc/async_context.h"
 #include "core/hle/service/acc/errors.h"
 #include "core/hle/service/acc/profile_manager.h"
 #include "core/hle/service/glue/arp.h"
@@ -454,22 +455,6 @@ public:
         : IProfileCommon{system_, "IProfileEditor", true, user_id_, profile_manager_} {}
 };
 
-class IAsyncContext final : public ServiceFramework<IAsyncContext> {
-public:
-    explicit IAsyncContext(Core::System& system_) : ServiceFramework{system_, "IAsyncContext"} {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "GetSystemEvent"},
-            {1, nullptr, "Cancel"},
-            {2, nullptr, "HasDone"},
-            {3, nullptr, "GetResult"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
 class ISessionObject final : public ServiceFramework<ISessionObject> {
 public:
     explicit ISessionObject(Core::System& system_, Common::UUID)
@@ -504,16 +489,44 @@ public:
     }
 };
 
+class EnsureTokenIdCacheAsyncInterface final : public IAsyncContext {
+public:
+    explicit EnsureTokenIdCacheAsyncInterface(Core::System& system_) : IAsyncContext{system_} {
+        MarkComplete();
+    }
+    ~EnsureTokenIdCacheAsyncInterface() = default;
+
+    void LoadIdTokenCache(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ResultSuccess);
+    }
+
+protected:
+    bool IsComplete() const override {
+        return true;
+    }
+
+    void Cancel() override {}
+
+    ResultCode GetResult() const override {
+        return ResultSuccess;
+    }
+};
+
 class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
 public:
     explicit IManagerForApplication(Core::System& system_, Common::UUID user_id_)
-        : ServiceFramework{system_, "IManagerForApplication"}, user_id{user_id_} {
+        : ServiceFramework{system_, "IManagerForApplication"},
+          ensure_token_id{std::make_shared<EnsureTokenIdCacheAsyncInterface>(system)},
+          user_id{user_id_} {
         // clang-format off
         static const FunctionInfo functions[] = {
             {0, &IManagerForApplication::CheckAvailability, "CheckAvailability"},
             {1, &IManagerForApplication::GetAccountId, "GetAccountId"},
-            {2, nullptr, "EnsureIdTokenCacheAsync"},
-            {3, nullptr, "LoadIdTokenCache"},
+            {2, &IManagerForApplication::EnsureIdTokenCacheAsync, "EnsureIdTokenCacheAsync"},
+            {3, &IManagerForApplication::LoadIdTokenCache, "LoadIdTokenCache"},
             {130, &IManagerForApplication::GetNintendoAccountUserResourceCacheForApplication, "GetNintendoAccountUserResourceCacheForApplication"},
             {150, nullptr, "CreateAuthorizationRequest"},
             {160, &IManagerForApplication::StoreOpenContext, "StoreOpenContext"},
@@ -540,6 +553,20 @@ private:
         rb.PushRaw<u64>(user_id.GetNintendoID());
     }
 
+    void EnsureIdTokenCacheAsync(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+        rb.Push(ResultSuccess);
+        rb.PushIpcInterface(ensure_token_id);
+    }
+
+    void LoadIdTokenCache(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+        ensure_token_id->LoadIdTokenCache(ctx);
+    }
+
     void GetNintendoAccountUserResourceCacheForApplication(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_ACC, "(STUBBED) called");
 
@@ -562,6 +589,7 @@ private:
         rb.Push(ResultSuccess);
     }
 
+    std::shared_ptr<EnsureTokenIdCacheAsyncInterface> ensure_token_id{};
     Common::UUID user_id{Common::INVALID_UUID};
 };
 
diff --git a/src/core/hle/service/acc/async_context.cpp b/src/core/hle/service/acc/async_context.cpp
new file mode 100644
index 000000000..459323132
--- /dev/null
+++ b/src/core/hle/service/acc/async_context.cpp
@@ -0,0 +1,68 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/core.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/service/acc/async_context.h"
+
+namespace Service::Account {
+IAsyncContext::IAsyncContext(Core::System& system_)
+    : ServiceFramework{system_, "IAsyncContext"}, compeletion_event{system_.Kernel()} {
+
+    Kernel::KAutoObject::Create(std::addressof(compeletion_event));
+    compeletion_event.Initialize("IAsyncContext:CompletionEvent");
+
+    // clang-format off
+    static const FunctionInfo functions[] = {
+        {0, &IAsyncContext::GetSystemEvent, "GetSystemEvent"},
+        {1, &IAsyncContext::Cancel, "Cancel"},
+        {2, &IAsyncContext::HasDone, "HasDone"},
+        {3, &IAsyncContext::GetResult, "GetResult"},
+    };
+    // clang-format on
+
+    RegisterHandlers(functions);
+}
+
+void IAsyncContext::GetSystemEvent(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_ACC, "called");
+
+    IPC::ResponseBuilder rb{ctx, 2, 1};
+    rb.Push(ResultSuccess);
+    rb.PushCopyObjects(compeletion_event.GetReadableEvent());
+}
+
+void IAsyncContext::Cancel(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_ACC, "called");
+
+    Cancel();
+    MarkComplete();
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(ResultSuccess);
+}
+
+void IAsyncContext::HasDone(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_ACC, "called");
+
+    is_complete.store(IsComplete());
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(ResultSuccess);
+    rb.Push(is_complete.load());
+}
+
+void IAsyncContext::GetResult(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_ACC, "called");
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(GetResult());
+}
+
+void IAsyncContext::MarkComplete() {
+    is_complete.store(true);
+    compeletion_event.GetWritableEvent().Signal();
+}
+
+} // namespace Service::Account
diff --git a/src/core/hle/service/acc/async_context.h b/src/core/hle/service/acc/async_context.h
new file mode 100644
index 000000000..c694b4946
--- /dev/null
+++ b/src/core/hle/service/acc/async_context.h
@@ -0,0 +1,37 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include "core/hle/kernel/k_event.h"
+#include "core/hle/service/service.h"
+
+namespace Core {
+class System;
+}
+
+namespace Service::Account {
+
+class IAsyncContext : public ServiceFramework<IAsyncContext> {
+public:
+    explicit IAsyncContext(Core::System& system_);
+
+    void GetSystemEvent(Kernel::HLERequestContext& ctx);
+    void Cancel(Kernel::HLERequestContext& ctx);
+    void HasDone(Kernel::HLERequestContext& ctx);
+    void GetResult(Kernel::HLERequestContext& ctx);
+
+protected:
+    virtual bool IsComplete() const = 0;
+    virtual void Cancel() = 0;
+    virtual ResultCode GetResult() const = 0;
+
+    void MarkComplete();
+
+    std::atomic<bool> is_complete{false};
+    Kernel::KEvent compeletion_event;
+};
+
+} // namespace Service::Account
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index a538f82e3..c3ac73131 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1270,7 +1270,8 @@ void ILibraryAppletCreator::CreateHandleStorage(Kernel::HLERequestContext& ctx)
 IApplicationFunctions::IApplicationFunctions(Core::System& system_)
     : ServiceFramework{system_, "IApplicationFunctions"}, gpu_error_detected_event{system.Kernel()},
       friend_invitation_storage_channel_event{system.Kernel()},
-      health_warning_disappeared_system_event{system.Kernel()} {
+      notification_storage_channel_event{system.Kernel()}, health_warning_disappeared_system_event{
+                                                               system.Kernel()} {
     // clang-format off
     static const FunctionInfo functions[] = {
         {1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"},
@@ -1322,7 +1323,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
         {131, nullptr, "SetDelayTimeToAbortOnGpuError"},
         {140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"},
         {141, &IApplicationFunctions::TryPopFromFriendInvitationStorageChannel, "TryPopFromFriendInvitationStorageChannel"},
-        {150, nullptr, "GetNotificationStorageChannelEvent"},
+        {150, &IApplicationFunctions::GetNotificationStorageChannelEvent, "GetNotificationStorageChannelEvent"},
         {151, nullptr, "TryPopFromNotificationStorageChannel"},
         {160, &IApplicationFunctions::GetHealthWarningDisappearedSystemEvent, "GetHealthWarningDisappearedSystemEvent"},
         {170, nullptr, "SetHdcpAuthenticationActivated"},
@@ -1340,11 +1341,14 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
 
     Kernel::KAutoObject::Create(std::addressof(gpu_error_detected_event));
     Kernel::KAutoObject::Create(std::addressof(friend_invitation_storage_channel_event));
+    Kernel::KAutoObject::Create(std::addressof(notification_storage_channel_event));
     Kernel::KAutoObject::Create(std::addressof(health_warning_disappeared_system_event));
 
     gpu_error_detected_event.Initialize("IApplicationFunctions:GpuErrorDetectedSystemEvent");
     friend_invitation_storage_channel_event.Initialize(
         "IApplicationFunctions:FriendInvitationStorageChannelEvent");
+    notification_storage_channel_event.Initialize(
+        "IApplicationFunctions:NotificationStorageChannelEvent");
     health_warning_disappeared_system_event.Initialize(
         "IApplicationFunctions:HealthWarningDisappearedSystemEvent");
 }
@@ -1762,6 +1766,14 @@ void IApplicationFunctions::TryPopFromFriendInvitationStorageChannel(
     rb.Push(ERR_NO_DATA_IN_CHANNEL);
 }
 
+void IApplicationFunctions::GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_AM, "called");
+
+    IPC::ResponseBuilder rb{ctx, 2, 1};
+    rb.Push(ResultSuccess);
+    rb.PushCopyObjects(notification_storage_channel_event.GetReadableEvent());
+}
+
 void IApplicationFunctions::GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_AM, "called");
 
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 184030a8e..c13aa5787 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -295,6 +295,7 @@ private:
     void GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx);
     void GetFriendInvitationStorageChannelEvent(Kernel::HLERequestContext& ctx);
     void TryPopFromFriendInvitationStorageChannel(Kernel::HLERequestContext& ctx);
+    void GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx);
     void GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx);
 
     bool launch_popped_application_specific = false;
@@ -302,6 +303,7 @@ private:
     s32 previous_program_index{-1};
     Kernel::KEvent gpu_error_detected_event;
     Kernel::KEvent friend_invitation_storage_channel_event;
+    Kernel::KEvent notification_storage_channel_event;
     Kernel::KEvent health_warning_disappeared_system_event;
 };
 
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 4a9b13e45..c8d65f328 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -97,14 +97,24 @@ ResultCode VfsDirectoryServiceWrapper::DeleteFile(const std::string& path_) cons
 
 ResultCode VfsDirectoryServiceWrapper::CreateDirectory(const std::string& path_) const {
     std::string path(Common::FS::SanitizePath(path_));
-    auto dir = GetDirectoryRelativeWrapped(backing, Common::FS::GetParentPath(path));
-    if (dir == nullptr || Common::FS::GetFilename(Common::FS::GetParentPath(path)).empty()) {
-        dir = backing;
-    }
-    auto new_dir = dir->CreateSubdirectory(Common::FS::GetFilename(path));
-    if (new_dir == nullptr) {
-        // TODO(DarkLordZach): Find a better error code for this
-        return ResultUnknown;
+
+    // NOTE: This is inaccurate behavior. CreateDirectory is not recursive.
+    // CreateDirectory should return PathNotFound if the parent directory does not exist.
+    // This is here temporarily in order to have UMM "work" in the meantime.
+    // TODO (Morph): Remove this when a hardware test verifies the correct behavior.
+    const auto components = Common::FS::SplitPathComponents(path);
+    std::string relative_path;
+    for (const auto& component : components) {
+        // Skip empty path components
+        if (component.empty()) {
+            continue;
+        }
+        relative_path = Common::FS::SanitizePath(relative_path + '/' + component);
+        auto new_dir = backing->CreateSubdirectory(relative_path);
+        if (new_dir == nullptr) {
+            // TODO(DarkLordZach): Find a better error code for this
+            return ResultUnknown;
+        }
     }
     return ResultSuccess;
 }
diff --git a/src/core/hle/service/hid/controllers/touchscreen.h b/src/core/hle/service/hid/controllers/touchscreen.h
index ef2becefd..8e9b40c0a 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -15,6 +15,20 @@
 namespace Service::HID {
 class Controller_Touchscreen final : public ControllerBase {
 public:
+    enum class TouchScreenModeForNx : u8 {
+        UseSystemSetting,
+        Finger,
+        Heat2,
+    };
+
+    struct TouchScreenConfigurationForNx {
+        TouchScreenModeForNx mode;
+        INSERT_PADDING_BYTES_NOINIT(0x7);
+        INSERT_PADDING_BYTES_NOINIT(0xF); // Reserved
+    };
+    static_assert(sizeof(TouchScreenConfigurationForNx) == 0x17,
+                  "TouchScreenConfigurationForNx is an invalid size");
+
     explicit Controller_Touchscreen(Core::System& system_);
     ~Controller_Touchscreen() override;
 
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index b8b80570d..a1707a72a 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -331,7 +331,7 @@ Hid::Hid(Core::System& system_)
         {529, nullptr, "SetDisallowedPalmaConnection"},
         {1000, &Hid::SetNpadCommunicationMode, "SetNpadCommunicationMode"},
         {1001, &Hid::GetNpadCommunicationMode, "GetNpadCommunicationMode"},
-        {1002, nullptr, "SetTouchScreenConfiguration"},
+        {1002, &Hid::SetTouchScreenConfiguration, "SetTouchScreenConfiguration"},
         {1003, nullptr, "IsFirmwareUpdateNeededForNotification"},
         {2000, nullptr, "ActivateDigitizer"},
     };
@@ -1631,6 +1631,18 @@ void Hid::GetNpadCommunicationMode(Kernel::HLERequestContext& ctx) {
                     .GetNpadCommunicationMode());
 }
 
+void Hid::SetTouchScreenConfiguration(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto touchscreen_mode{rp.PopRaw<Controller_Touchscreen::TouchScreenConfigurationForNx>()};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+
+    LOG_WARNING(Service_HID, "(STUBBED) called, touchscreen_mode={}, applet_resource_user_id={}",
+                touchscreen_mode.mode, applet_resource_user_id);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(ResultSuccess);
+}
+
 class HidDbg final : public ServiceFramework<HidDbg> {
 public:
     explicit HidDbg(Core::System& system_) : ServiceFramework{system_, "hid:dbg"} {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 9c5c7f252..b1fe75e94 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -159,6 +159,7 @@ private:
     void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
     void SetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
     void GetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
+    void SetTouchScreenConfiguration(Kernel::HLERequestContext& ctx);
 
     enum class VibrationDeviceType : u32 {
         Unknown = 0,
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index ce6065db2..a33e47d0b 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -42,15 +42,14 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
                         u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
                         const Common::Rectangle<int>& crop_rect) {
-    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
+    const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
     LOG_TRACE(Service,
               "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
               addr, offset, width, height, stride, format);
 
-    using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
-    const Tegra::FramebufferConfig framebuffer{
-        addr,      offset,   width, height, stride, static_cast<PixelFormat>(format),
-        transform, crop_rect};
+    const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format);
+    const Tegra::FramebufferConfig framebuffer{addr,   offset,       width,     height,
+                                               stride, pixel_format, transform, crop_rect};
 
     system.GetPerfStats().EndSystemFrame();
     system.GPU().SwapBuffers(&framebuffer);
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 59ddf6298..b4c3a6099 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -9,17 +9,20 @@
 #include "core/core.h"
 #include "core/hle/kernel/k_writable_event.h"
 #include "core/hle/kernel/kernel.h"
+#include "core/hle/service/kernel_helpers.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 
 namespace Service::NVFlinger {
 
-BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_)
-    : id(id_), layer_id(layer_id_), buffer_wait_event{kernel} {
-    Kernel::KAutoObject::Create(std::addressof(buffer_wait_event));
-    buffer_wait_event.Initialize("BufferQueue:WaitEvent");
+BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
+                         KernelHelpers::ServiceContext& service_context_)
+    : id(id_), layer_id(layer_id_), service_context{service_context_} {
+    buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
 }
 
-BufferQueue::~BufferQueue() = default;
+BufferQueue::~BufferQueue() {
+    service_context.CloseEvent(buffer_wait_event);
+}
 
 void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) {
     ASSERT(slot < buffer_slots);
@@ -41,7 +44,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
         .multi_fence = {},
     };
 
-    buffer_wait_event.GetWritableEvent().Signal();
+    buffer_wait_event->GetWritableEvent().Signal();
 }
 
 std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
@@ -119,7 +122,7 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult
     }
     free_buffers_condition.notify_one();
 
-    buffer_wait_event.GetWritableEvent().Signal();
+    buffer_wait_event->GetWritableEvent().Signal();
 }
 
 std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
@@ -154,7 +157,7 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
     }
     free_buffers_condition.notify_one();
 
-    buffer_wait_event.GetWritableEvent().Signal();
+    buffer_wait_event->GetWritableEvent().Signal();
 }
 
 void BufferQueue::Connect() {
@@ -169,7 +172,7 @@ void BufferQueue::Disconnect() {
         std::unique_lock lock{queue_sequence_mutex};
         queue_sequence.clear();
     }
-    buffer_wait_event.GetWritableEvent().Signal();
+    buffer_wait_event->GetWritableEvent().Signal();
     is_connect = false;
     free_buffers_condition.notify_one();
 }
@@ -189,11 +192,11 @@ u32 BufferQueue::Query(QueryType type) {
 }
 
 Kernel::KWritableEvent& BufferQueue::GetWritableBufferWaitEvent() {
-    return buffer_wait_event.GetWritableEvent();
+    return buffer_wait_event->GetWritableEvent();
 }
 
 Kernel::KReadableEvent& BufferQueue::GetBufferWaitEvent() {
-    return buffer_wait_event.GetReadableEvent();
+    return buffer_wait_event->GetReadableEvent();
 }
 
 } // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index 61e337ac5..78de3f354 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -24,6 +24,10 @@ class KReadableEvent;
 class KWritableEvent;
 } // namespace Kernel
 
+namespace Service::KernelHelpers {
+class ServiceContext;
+} // namespace Service::KernelHelpers
+
 namespace Service::NVFlinger {
 
 constexpr u32 buffer_slots = 0x40;
@@ -38,7 +42,9 @@ struct IGBPBuffer {
     u32_le index;
     INSERT_PADDING_WORDS(3);
     u32_le gpu_buffer_id;
-    INSERT_PADDING_WORDS(17);
+    INSERT_PADDING_WORDS(6);
+    u32_le external_format;
+    INSERT_PADDING_WORDS(10);
     u32_le nvmap_handle;
     u32_le offset;
     INSERT_PADDING_WORDS(60);
@@ -54,7 +60,8 @@ public:
         NativeWindowFormat = 2,
     };
 
-    explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_);
+    explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
+                         KernelHelpers::ServiceContext& service_context_);
     ~BufferQueue();
 
     enum class BufferTransformFlags : u32 {
@@ -130,12 +137,14 @@ private:
     std::list<u32> free_buffers;
     std::array<Buffer, buffer_slots> buffers;
     std::list<u32> queue_sequence;
-    Kernel::KEvent buffer_wait_event;
+    Kernel::KEvent* buffer_wait_event{};
 
     std::mutex free_buffers_mutex;
     std::condition_variable free_buffers_condition;
 
     std::mutex queue_sequence_mutex;
+
+    KernelHelpers::ServiceContext& service_context;
 };
 
 } // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 941748970..3ead813b0 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -61,12 +61,13 @@ void NVFlinger::SplitVSync() {
     }
 }
 
-NVFlinger::NVFlinger(Core::System& system_) : system(system_) {
-    displays.emplace_back(0, "Default", system);
-    displays.emplace_back(1, "External", system);
-    displays.emplace_back(2, "Edid", system);
-    displays.emplace_back(3, "Internal", system);
-    displays.emplace_back(4, "Null", system);
+NVFlinger::NVFlinger(Core::System& system_)
+    : system(system_), service_context(system_, "nvflinger") {
+    displays.emplace_back(0, "Default", service_context, system);
+    displays.emplace_back(1, "External", service_context, system);
+    displays.emplace_back(2, "Edid", service_context, system);
+    displays.emplace_back(3, "Internal", service_context, system);
+    displays.emplace_back(4, "Null", service_context, system);
     guard = std::make_shared<std::mutex>();
 
     // Schedule the screen composition events
@@ -146,7 +147,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
 void NVFlinger::CreateLayerAtId(VI::Display& display, u64 layer_id) {
     const u32 buffer_queue_id = next_buffer_queue_id++;
     buffer_queues.emplace_back(
-        std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id));
+        std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id, service_context));
     display.CreateLayer(layer_id, *buffer_queues.back());
 }
 
@@ -297,7 +298,7 @@ void NVFlinger::Compose() {
         auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
         ASSERT(nvdisp);
 
-        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
+        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.external_format,
                      igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
                      buffer->get().transform, buffer->get().crop_rect);
 
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index d80fd07ef..6d84cafb4 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -15,6 +15,7 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "core/hle/service/kernel_helpers.h"
 
 namespace Common {
 class Event;
@@ -135,6 +136,8 @@ private:
     std::unique_ptr<std::thread> vsync_thread;
     std::unique_ptr<Common::Event> wait_event;
     std::atomic<bool> is_running{};
+
+    KernelHelpers::ServiceContext service_context;
 };
 
 } // namespace Service::NVFlinger
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
index 0dd342dbf..b7705c02a 100644
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -12,18 +12,21 @@
 #include "core/hle/kernel/k_event.h"
 #include "core/hle/kernel/k_readable_event.h"
 #include "core/hle/kernel/k_writable_event.h"
+#include "core/hle/service/kernel_helpers.h"
 #include "core/hle/service/vi/display/vi_display.h"
 #include "core/hle/service/vi/layer/vi_layer.h"
 
 namespace Service::VI {
 
-Display::Display(u64 id, std::string name_, Core::System& system)
-    : display_id{id}, name{std::move(name_)}, vsync_event{system.Kernel()} {
-    Kernel::KAutoObject::Create(std::addressof(vsync_event));
-    vsync_event.Initialize(fmt::format("Display VSync Event {}", id));
+Display::Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_,
+                 Core::System& system_)
+    : display_id{id}, name{std::move(name_)}, service_context{service_context_} {
+    vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id));
 }
 
-Display::~Display() = default;
+Display::~Display() {
+    service_context.CloseEvent(vsync_event);
+}
 
 Layer& Display::GetLayer(std::size_t index) {
     return *layers.at(index);
@@ -34,11 +37,11 @@ const Layer& Display::GetLayer(std::size_t index) const {
 }
 
 Kernel::KReadableEvent& Display::GetVSyncEvent() {
-    return vsync_event.GetReadableEvent();
+    return vsync_event->GetReadableEvent();
 }
 
 void Display::SignalVSyncEvent() {
-    vsync_event.GetWritableEvent().Signal();
+    vsync_event->GetWritableEvent().Signal();
 }
 
 void Display::CreateLayer(u64 layer_id, NVFlinger::BufferQueue& buffer_queue) {
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
index 166f2a4cc..0979fc421 100644
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -18,6 +18,9 @@ class KEvent;
 namespace Service::NVFlinger {
 class BufferQueue;
 }
+namespace Service::KernelHelpers {
+class ServiceContext;
+} // namespace Service::KernelHelpers
 
 namespace Service::VI {
 
@@ -31,10 +34,13 @@ class Display {
 public:
     /// Constructs a display with a given unique ID and name.
     ///
-    /// @param id   The unique ID for this display.
+    /// @param id The unique ID for this display.
+    /// @param service_context_ The ServiceContext for the owning service.
     /// @param name_ The name for this display.
+    /// @param system_ The global system instance.
     ///
-    Display(u64 id, std::string name_, Core::System& system);
+    Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_,
+            Core::System& system_);
     ~Display();
 
     /// Gets the unique ID assigned to this display.
@@ -98,9 +104,10 @@ public:
 private:
     u64 display_id;
     std::string name;
+    KernelHelpers::ServiceContext& service_context;
 
     std::vector<std::shared_ptr<Layer>> layers;
-    Kernel::KEvent vsync_event;
+    Kernel::KEvent* vsync_event{};
 };
 
 } // namespace Service::VI
diff --git a/src/core/network/network.cpp b/src/core/network/network.cpp
index 4732d4485..72eea52f0 100644
--- a/src/core/network/network.cpp
+++ b/src/core/network/network.cpp
@@ -7,7 +7,8 @@
 #include <limits>
 #include <utility>
 #include <vector>
-#include "common/common_funcs.h"
+
+#include "common/error.h"
 
 #ifdef _WIN32
 #include <winsock2.h>
@@ -223,7 +224,7 @@ Errno GetAndLogLastError() {
     if (err == Errno::AGAIN) {
         return err;
     }
-    LOG_ERROR(Network, "Socket operation error: {}", NativeErrorToString(e));
+    LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));
     return err;
 }
 
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 5a8cfd301..1f1607998 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
     return "Unknown";
 }
 
+static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) {
+    switch (backend) {
+    case Settings::NvdecEmulation::Off:
+        return "Off";
+    case Settings::NvdecEmulation::CPU:
+        return "CPU";
+    case Settings::NvdecEmulation::GPU:
+        return "GPU";
+    }
+    return "Unknown";
+}
+
 u64 GetTelemetryId() {
     u64 telemetry_id{};
     const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id";
@@ -229,8 +241,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
              TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue()));
     AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
              Settings::values.use_asynchronous_gpu_emulation.GetValue());
-    AddField(field_type, "Renderer_UseNvdecEmulation",
-             Settings::values.use_nvdec_emulation.GetValue());
+    AddField(field_type, "Renderer_NvdecEmulation",
+             TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue()));
     AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
     AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
     AddField(field_type, "Renderer_ShaderBackend",
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
index a982dd8a2..cd285e2c8 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -11,6 +11,8 @@
 
 namespace Shader::Backend::GLSL {
 namespace {
+constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"};
+
 void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
     IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
     if (!in_bounds) {
@@ -43,84 +45,100 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
     ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
     SetInBoundsFlag(ctx, inst);
 }
+
+std::string_view BallotIndex(EmitContext& ctx) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ".x";
+    }
+    return "[gl_SubGroupInvocationARB>>5]";
+}
+
+std::string GetMask(EmitContext& ctx, std::string_view mask) {
+    const auto ballot_index{BallotIndex(ctx)};
+    return fmt::format("uint(uvec2({}){})", mask, ballot_index);
+}
 } // Anonymous namespace
 
 void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
+    ctx.AddU32("{}={}&31u;", inst, THREAD_ID);
 }
 
 void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
     if (!ctx.profile.warp_size_potentially_larger_than_guest) {
         ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
-    } else {
-        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
-        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
-        ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
+        return;
     }
+    const auto ballot_index{BallotIndex(ctx)};
+    const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
+    const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
+    ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
 }
 
 void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
     if (!ctx.profile.warp_size_potentially_larger_than_guest) {
         ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
-    } else {
-        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
-        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
-        ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
+        return;
     }
+    const auto ballot_index{BallotIndex(ctx)};
+    const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
+    const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
+    ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
 }
 
 void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
     if (!ctx.profile.warp_size_potentially_larger_than_guest) {
         ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
-    } else {
-        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
-        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
-        const auto value{fmt::format("({}^{})", ballot, active_mask)};
-        ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
+        return;
     }
+    const auto ballot_index{BallotIndex(ctx)};
+    const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
+    const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
+    const auto value{fmt::format("({}^{})", ballot, active_mask)};
+    ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
 }
 
 void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
-    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
-        ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
-    } else {
-        ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
-    }
+    const auto ballot_index{BallotIndex(ctx)};
+    ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index);
 }
 
 void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
+    ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB"));
 }
 
 void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
+    ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB"));
 }
 
 void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
+    ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB"));
 }
 
 void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
+    ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB"));
 }
 
 void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
+    ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB"));
 }
 
 void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
-                      std::string_view index, std::string_view clamp,
-                      std::string_view segmentation_mask) {
+                      std::string_view index, std::string_view clamp, std::string_view seg_mask) {
     if (ctx.profile.support_gl_warp_intrinsics) {
-        UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
+        UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask);
         return;
     }
-    const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
-    const auto thread_id{"gl_SubGroupInvocationARB"};
-    const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
-    const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
+    const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+    const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+    const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)};
+    const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+    const auto not_seg_mask{fmt::format("(~{})", seg_mask)};
+    const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)};
+    const auto max_thread_id{
+        ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)};
 
-    const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
+    const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)};
     const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
     ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
     SetInBoundsFlag(ctx, inst);
@@ -128,29 +146,34 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
 }
 
 void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
-                   std::string_view clamp, std::string_view segmentation_mask) {
+                   std::string_view clamp, std::string_view seg_mask) {
     if (ctx.profile.support_gl_warp_intrinsics) {
-        UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
+        UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask);
         return;
     }
-    const auto thread_id{"gl_SubGroupInvocationARB"};
-    const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
-    const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
+    const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+    const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+    const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+    const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
+    const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)};
     ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
     SetInBoundsFlag(ctx, inst);
     ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
 }
 
 void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
-                     std::string_view index, std::string_view clamp,
-                     std::string_view segmentation_mask) {
+                     std::string_view index, std::string_view clamp, std::string_view seg_mask) {
     if (ctx.profile.support_gl_warp_intrinsics) {
-        UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
+        UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask);
         return;
     }
-    const auto thread_id{"gl_SubGroupInvocationARB"};
-    const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
-    const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
+    const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+    const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+    const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+    const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
+    const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)};
     ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
     SetInBoundsFlag(ctx, inst);
     ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
@@ -158,14 +181,17 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
 
 void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
                           std::string_view index, std::string_view clamp,
-                          std::string_view segmentation_mask) {
+                          std::string_view seg_mask) {
     if (ctx.profile.support_gl_warp_intrinsics) {
-        UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
+        UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask);
         return;
     }
-    const auto thread_id{"gl_SubGroupInvocationARB"};
-    const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
-    const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
+    const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+    const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+    const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+    const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
+    const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)};
     ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
     SetInBoundsFlag(ctx, inst);
     ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 2d29d8c14..2885e6799 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -15,6 +15,8 @@
 
 namespace Shader::Backend::SPIRV {
 namespace {
+constexpr size_t NUM_FIXEDFNCTEXTURE = 10;
+
 enum class Operation {
     Increment,
     Decrement,
@@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
         return pointer_type;
     }
 }
+
+size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations,
+                              size_t start_offset) {
+    for (size_t location = start_offset; location < used_locations.size(); ++location) {
+        if (!used_locations.test(location)) {
+            return location;
+        }
+    }
+    throw RuntimeError("Unable to get an unused location for legacy attribute");
+}
 } // Anonymous namespace
 
 void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
         loads[IR::Attribute::TessellationEvaluationPointV]) {
         tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
     }
+    std::bitset<IR::NUM_GENERICS> used_locations{};
     for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
         const AttributeType input_type{runtime_info.generic_input_types[index]};
         if (!runtime_info.previous_stage_stores.Generic(index)) {
@@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
         if (input_type == AttributeType::Disabled) {
             continue;
         }
+        used_locations.set(index);
         const Id type{GetAttributeType(*this, input_type)};
         const Id id{DefineInput(*this, type, true)};
         Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
@@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) {
             break;
         }
     }
+    size_t previous_unused_location = 0;
+    if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+        const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
+        previous_unused_location = location;
+        used_locations.set(location);
+        const Id id{DefineInput(*this, F32[4], true)};
+        Decorate(id, spv::Decoration::Location, location);
+        input_front_color = id;
+    }
+    for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
+        if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+            const size_t location =
+                FindNextUnusedLocation(used_locations, previous_unused_location);
+            previous_unused_location = location;
+            used_locations.set(location);
+            const Id id{DefineInput(*this, F32[4], true)};
+            Decorate(id, spv::Decoration::Location, location);
+            input_fixed_fnc_textures[index] = id;
+        }
+    }
     if (stage == Stage::TessellationEval) {
         for (size_t index = 0; index < info.uses_patches.size(); ++index) {
             if (!info.uses_patches[index]) {
@@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
         viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
                                      spv::BuiltIn::ViewportMaskNV);
     }
+    std::bitset<IR::NUM_GENERICS> used_locations{};
     for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
         if (info.stores.Generic(index)) {
             DefineGenericOutput(*this, index, invocations);
+            used_locations.set(index);
+        }
+    }
+    size_t previous_unused_location = 0;
+    if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+        const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
+        previous_unused_location = location;
+        used_locations.set(location);
+        const Id id{DefineOutput(*this, F32[4], invocations)};
+        Decorate(id, spv::Decoration::Location, static_cast<u32>(location));
+        output_front_color = id;
+    }
+    for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
+        if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+            const size_t location =
+                FindNextUnusedLocation(used_locations, previous_unused_location);
+            previous_unused_location = location;
+            used_locations.set(location);
+            const Id id{DefineOutput(*this, F32[4], invocations)};
+            Decorate(id, spv::Decoration::Location, location);
+            output_fixed_fnc_textures[index] = id;
         }
     }
     switch (stage) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index e277bc358..847d0c0e6 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -268,10 +268,14 @@ public:
     Id write_global_func_u32x4{};
 
     Id input_position{};
+    Id input_front_color{};
+    std::array<Id, 10> input_fixed_fnc_textures{};
     std::array<Id, 32> input_generics{};
 
     Id output_point_size{};
     Id output_position{};
+    Id output_front_color{};
+    std::array<Id, 10> output_fixed_fnc_textures{};
     std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
 
     Id output_tess_level_outer{};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 9e54a17ee..68f360b3c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...
     }
 }
 
+bool IsFixedFncTexture(IR::Attribute attribute) {
+    return attribute >= IR::Attribute::FixedFncTexture0S &&
+           attribute <= IR::Attribute::FixedFncTexture9Q;
+}
+
+u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) {
+    if (!IsFixedFncTexture(attribute)) {
+        throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
+    }
+    return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u;
+}
+
+u32 FixedFncTextureAttributeElement(IR::Attribute attribute) {
+    if (!IsFixedFncTexture(attribute)) {
+        throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
+    }
+    return static_cast<u32>(attribute) % 4u;
+}
+
 template <typename... Args>
 Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
     if (ctx.stage == Stage::TessellationControl) {
@@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
             return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
         }
     }
+    if (IsFixedFncTexture(attr)) {
+        const u32 index{FixedFncTextureAttributeIndex(attr)};
+        const u32 element{FixedFncTextureAttributeElement(attr)};
+        const Id element_id{ctx.Const(element)};
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index],
+                                 element_id);
+    }
     switch (attr) {
     case IR::Attribute::PointSize:
         return ctx.output_point_size;
@@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
         const Id element_id{ctx.Const(element)};
         return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
     }
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA: {
+        const u32 element{static_cast<u32>(attr) % 4};
+        const Id element_id{ctx.Const(element)};
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id);
+    }
     case IR::Attribute::ClipDistance0:
     case IR::Attribute::ClipDistance1:
     case IR::Attribute::ClipDistance2:
@@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
         const Id value{ctx.OpLoad(type->id, pointer)};
         return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
     }
+    if (IsFixedFncTexture(attr)) {
+        const u32 index{FixedFncTextureAttributeIndex(attr)};
+        const Id attr_id{ctx.input_fixed_fnc_textures[index]};
+        const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))};
+        return ctx.OpLoad(ctx.F32[1], attr_ptr);
+    }
     switch (attr) {
     case IR::Attribute::PrimitiveId:
         return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
@@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
     case IR::Attribute::PositionW:
         return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
                                                   ctx.Const(element)));
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA: {
+        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color,
+                                                  ctx.Const(element)));
+    }
     case IR::Attribute::InstanceId:
         if (ctx.profile.support_vertex_instance_id) {
             return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 78b1e1ba7..cef52c56e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -7,8 +7,13 @@
 
 namespace Shader::Backend::SPIRV {
 namespace {
+Id GetThreadId(EmitContext& ctx) {
+    return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
+}
+
 Id WarpExtract(EmitContext& ctx, Id value) {
-    const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id thread_id{GetThreadId(ctx)};
+    const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))};
     return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
 }
 
@@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
     return ctx.OpSelect(ctx.U32[1], in_range,
                         ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
 }
+
+Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
+    const Id thirty_two{ctx.Const(32u)};
+    const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
+    const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
+    return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
+}
 } // Anonymous namespace
 
 Id EmitLaneId(EmitContext& ctx) {
-    const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id id{GetThreadId(ctx)};
     if (!ctx.profile.warp_size_potentially_larger_than_guest) {
         return id;
     }
@@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
 Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
                     Id segmentation_mask) {
     const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
-    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id thread_id{GetThreadId(ctx)};
+    if (ctx.profile.warp_size_potentially_larger_than_guest) {
+        const Id thirty_two{ctx.Const(32u)};
+        const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
+        const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
+        const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
+        index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
+        clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
+    }
     const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
     const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
 
@@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
 
 Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
                  Id segmentation_mask) {
-    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id thread_id{GetThreadId(ctx)};
+    if (ctx.profile.warp_size_potentially_larger_than_guest) {
+        clamp = GetUpperClamp(ctx, thread_id, clamp);
+    }
     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
     const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
     const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
 
 Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
                    Id segmentation_mask) {
-    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id thread_id{GetThreadId(ctx)};
+    if (ctx.profile.warp_size_potentially_larger_than_guest) {
+        clamp = GetUpperClamp(ctx, thread_id, clamp);
+    }
     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
     const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
 
 Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
                         Id segmentation_mask) {
-    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id thread_id{GetThreadId(ctx)};
+    if (ctx.profile.warp_size_potentially_larger_than_guest) {
+        clamp = GetUpperClamp(ctx, thread_id, clamp);
+    }
     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
     const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2f6cdd216..269db21a5 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -231,6 +231,7 @@ endif()
 
 target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
 target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
+target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
 
 add_dependencies(video_core host_shaders)
 target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index f798a0053..61966cbfe 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -5,6 +5,7 @@
 #include <fstream>
 #include <vector>
 #include "common/assert.h"
+#include "common/settings.h"
 #include "video_core/command_classes/codecs/codec.h"
 #include "video_core/command_classes/codecs/h264.h"
 #include "video_core/command_classes/codecs/vp9.h"
@@ -16,108 +17,146 @@ extern "C" {
 }
 
 namespace Tegra {
-#if defined(LIBVA_FOUND)
-// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license
 namespace {
-constexpr std::array<const char*, 2> VAAPI_DRIVERS = {
-    "i915",
-    "amdgpu",
-};
+constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
+constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
+
+void AVPacketDeleter(AVPacket* ptr) {
+    av_packet_free(&ptr);
+}
 
-AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) {
+using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
+
+AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
     for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
-        if (*p == AV_PIX_FMT_VAAPI) {
-            return AV_PIX_FMT_VAAPI;
+        if (*p == av_codec_ctx->pix_fmt) {
+            return av_codec_ctx->pix_fmt;
         }
     }
     LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
-    return *pix_fmts;
+    av_buffer_unref(&av_codec_ctx->hw_device_ctx);
+    av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
+    return PREFERRED_CPU_FMT;
+}
+} // namespace
+
+void AVFrameDeleter(AVFrame* ptr) {
+    av_frame_free(&ptr);
 }
 
-bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) {
+Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
+    : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
+      vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
+
+Codec::~Codec() {
+    if (!initialized) {
+        return;
+    }
+    // Free libav memory
+    avcodec_free_context(&av_codec_ctx);
+    av_buffer_unref(&av_gpu_decoder);
+}
+
+bool Codec::CreateGpuAvDevice() {
+#if defined(LIBVA_FOUND)
+    static constexpr std::array<const char*, 3> VAAPI_DRIVERS = {
+        "i915",
+        "iHD",
+        "amdgpu",
+    };
     AVDictionary* hwdevice_options = nullptr;
     av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
     for (const auto& driver : VAAPI_DRIVERS) {
         av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
-        const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI,
+        const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI,
                                                           nullptr, hwdevice_options, 0);
         if (hwdevice_error >= 0) {
             LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
             av_dict_free(&hwdevice_options);
+            av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
             return true;
         }
         LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
     }
     LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
     av_dict_free(&hwdevice_options);
-    return false;
-}
-} // namespace
 #endif
-
-void AVFrameDeleter(AVFrame* ptr) {
-    av_frame_free(&ptr);
+    static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
+    static constexpr std::array GPU_DECODER_TYPES{
+        AV_HWDEVICE_TYPE_CUDA,
+#ifdef _WIN32
+        AV_HWDEVICE_TYPE_D3D11VA,
+#else
+        AV_HWDEVICE_TYPE_VDPAU,
+#endif
+    };
+    for (const auto& type : GPU_DECODER_TYPES) {
+        const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
+        if (hwdevice_res < 0) {
+            LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
+                      av_hwdevice_get_type_name(type), hwdevice_res);
+            continue;
+        }
+        for (int i = 0;; i++) {
+            const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
+            if (!config) {
+                LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
+                          av_codec->name, av_hwdevice_get_type_name(type));
+                break;
+            }
+            if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
+                av_codec_ctx->pix_fmt = config->pix_fmt;
+                LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
+                return true;
+            }
+        }
+    }
+    return false;
 }
 
-Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
-    : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
-      vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
-
-Codec::~Codec() {
-    if (!initialized) {
-        return;
-    }
-    // Free libav memory
-    avcodec_send_packet(av_codec_ctx, nullptr);
-    AVFrame* av_frame = av_frame_alloc();
-    avcodec_receive_frame(av_codec_ctx, av_frame);
-    avcodec_flush_buffers(av_codec_ctx);
-    av_frame_free(&av_frame);
-    avcodec_close(av_codec_ctx);
-    av_buffer_unref(&av_hw_device);
+void Codec::InitializeAvCodecContext() {
+    av_codec_ctx = avcodec_alloc_context3(av_codec);
+    av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
 }
 
-void Codec::InitializeHwdec() {
-    // Prioritize integrated GPU to mitigate bandwidth bottlenecks
-#if defined(LIBVA_FOUND)
-    if (CreateVaapiHwdevice(&av_hw_device)) {
-        const auto hw_device_ctx = av_buffer_ref(av_hw_device);
-        ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
-        av_codec_ctx->hw_device_ctx = hw_device_ctx;
-        av_codec_ctx->get_format = GetHwFormat;
+void Codec::InitializeGpuDecoder() {
+    if (!CreateGpuAvDevice()) {
+        av_buffer_unref(&av_gpu_decoder);
         return;
     }
-#endif
-    // TODO more GPU accelerated decoders
+    auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
+    ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
+    av_codec_ctx->hw_device_ctx = hw_device_ctx;
+    av_codec_ctx->get_format = GetGpuFormat;
 }
 
 void Codec::Initialize() {
-    AVCodecID codec;
-    switch (current_codec) {
-    case NvdecCommon::VideoCodec::H264:
-        codec = AV_CODEC_ID_H264;
-        break;
-    case NvdecCommon::VideoCodec::Vp9:
-        codec = AV_CODEC_ID_VP9;
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
+    const AVCodecID codec = [&] {
+        switch (current_codec) {
+        case NvdecCommon::VideoCodec::H264:
+            return AV_CODEC_ID_H264;
+        case NvdecCommon::VideoCodec::Vp9:
+            return AV_CODEC_ID_VP9;
+        default:
+            UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
+            return AV_CODEC_ID_NONE;
+        }
+    }();
+    av_codec = avcodec_find_decoder(codec);
+
+    InitializeAvCodecContext();
+    if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) {
+        InitializeGpuDecoder();
+    }
+    if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
+        LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
+        avcodec_free_context(&av_codec_ctx);
+        av_buffer_unref(&av_gpu_decoder);
         return;
     }
-    av_codec = avcodec_find_decoder(codec);
-    av_codec_ctx = avcodec_alloc_context3(av_codec);
-    av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
-    InitializeHwdec();
     if (!av_codec_ctx->hw_device_ctx) {
         LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
     }
-    const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
-    if (av_error < 0) {
-        LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
-        avcodec_close(av_codec_ctx);
-        av_buffer_unref(&av_hw_device);
-        return;
-    }
     initialized = true;
 }
 
@@ -133,6 +172,9 @@ void Codec::Decode() {
     if (is_first_frame) {
         Initialize();
     }
+    if (!initialized) {
+        return;
+    }
     bool vp9_hidden_frame = false;
     std::vector<u8> frame_data;
     if (current_codec == NvdecCommon::VideoCodec::H264) {
@@ -141,50 +183,48 @@ void Codec::Decode() {
         frame_data = vp9_decoder->ComposeFrameHeader(state);
         vp9_hidden_frame = vp9_decoder->WasFrameHidden();
     }
-    AVPacket packet{};
-    av_init_packet(&packet);
-    packet.data = frame_data.data();
-    packet.size = static_cast<s32>(frame_data.size());
-    if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) {
-        LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret);
+    AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
+    if (!packet) {
+        LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
+        return;
+    }
+    packet->data = frame_data.data();
+    packet->size = static_cast<s32>(frame_data.size());
+    if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
+        LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
         return;
     }
     // Only receive/store visible frames
     if (vp9_hidden_frame) {
         return;
     }
-    AVFrame* hw_frame = av_frame_alloc();
-    AVFrame* sw_frame = hw_frame;
-    ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed");
-    if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) {
+    AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
+    AVFramePtr final_frame{nullptr, AVFrameDeleter};
+    ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
+    if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
         LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
-        av_frame_free(&hw_frame);
         return;
     }
-    if (!hw_frame->width || !hw_frame->height) {
+    if (initial_frame->width == 0 || initial_frame->height == 0) {
         LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
-        av_frame_free(&hw_frame);
         return;
     }
-#if defined(LIBVA_FOUND)
-    // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license
-    if (hw_frame->format == AV_PIX_FMT_VAAPI) {
-        sw_frame = av_frame_alloc();
-        ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed");
+    if (av_codec_ctx->hw_device_ctx) {
+        final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
+        ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
         // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
         // because Intel drivers crash unless using AV_PIX_FMT_NV12
-        sw_frame->format = AV_PIX_FMT_NV12;
-        const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
-        ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret);
-        av_frame_free(&hw_frame);
+        final_frame->format = PREFERRED_GPU_FMT;
+        const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
+        ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
+    } else {
+        final_frame = std::move(initial_frame);
     }
-#endif
-    if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) {
-        UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format);
-        av_frame_free(&sw_frame);
+    if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
+        UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
         return;
     }
-    av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter});
+    av_frames.push(std::move(final_frame));
     if (av_frames.size() > 10) {
         LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
         av_frames.pop();
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 71936203f..f9a80886f 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <memory>
+#include <string_view>
 #include <queue>
 #include "common/common_types.h"
 #include "video_core/command_classes/nvdec_common.h"
@@ -50,18 +51,23 @@ public:
 
     /// Returns the value of current_codec
     [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+
     /// Return name of the current codec
     [[nodiscard]] std::string_view GetCurrentCodecName() const;
 
 private:
-    void InitializeHwdec();
+    void InitializeAvCodecContext();
+
+    void InitializeGpuDecoder();
+
+    bool CreateGpuAvDevice();
 
     bool initialized{};
     NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
 
     AVCodec* av_codec{nullptr};
-    AVBufferRef* av_hw_device{nullptr};
     AVCodecContext* av_codec_ctx{nullptr};
+    AVBufferRef* av_gpu_decoder{nullptr};
 
     GPU& gpu;
     const NvdecCommon::NvdecRegisters& state;
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index 5fb6d45ee..51ee14c13 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -95,7 +95,8 @@ const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegister
     const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
                            (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
 
-    writer.WriteUe(16);
+    // TODO (ameerj): Where do we get this number, it seems to be particular for each stream
+    writer.WriteUe(6); // Max number of reference frames
     writer.WriteBit(false);
     writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
     writer.WriteUe(pic_height - 1);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1aa43523a..7f4ca6282 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -475,10 +475,10 @@ public:
 
                 // These values are used by Nouveau and some games.
                 AddGL = 0x8006,
-                SubtractGL = 0x8007,
-                ReverseSubtractGL = 0x8008,
-                MinGL = 0x800a,
-                MaxGL = 0x800b
+                MinGL = 0x8007,
+                MaxGL = 0x8008,
+                SubtractGL = 0x800a,
+                ReverseSubtractGL = 0x800b
             };
 
             enum class Factor : u32 {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index c60ed6453..dce00e829 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 7c9b0d6db..9ff0a28cd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
         blit_screen.Recreate();
     }
     const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
-    scheduler.Flush(render_semaphore);
+    const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
+    scheduler.Flush(render_semaphore, present_semaphore);
     scheduler.WaitWorker();
     swapchain.Present(render_semaphore);
 
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index cb0580182..888bc7392 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -358,7 +358,7 @@ void VKBlitScreen::CreateDescriptorPool() {
 void VKBlitScreen::CreateRenderPass() {
     const VkAttachmentDescription color_attachment{
         .flags = 0,
-        .format = swapchain.GetImageFormat(),
+        .format = swapchain.GetImageViewFormat(),
         .samples = VK_SAMPLE_COUNT_1_BIT,
         .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
         .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index 8e77e4796..d87da2a34 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <mutex>
 #include <span>
 #include <vector>
@@ -18,7 +19,6 @@ namespace Vulkan {
 // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
 constexpr size_t SETS_GROW_RATE = 16;
 constexpr s32 SCORE_THRESHOLD = 3;
-constexpr u32 SETS_PER_POOL = 64;
 
 struct DescriptorBank {
     DescriptorBankInfo info;
@@ -58,11 +58,12 @@ static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
 static void AllocatePool(const Device& device, DescriptorBank& bank) {
     std::array<VkDescriptorPoolSize, 6> pool_sizes;
     size_t pool_cursor{};
+    const u32 sets_per_pool = device.GetSetsPerPool();
     const auto add = [&](VkDescriptorType type, u32 count) {
         if (count > 0) {
             pool_sizes[pool_cursor++] = {
                 .type = type,
-                .descriptorCount = count * SETS_PER_POOL,
+                .descriptorCount = count * sets_per_pool,
             };
         }
     };
@@ -77,7 +78,7 @@ static void AllocatePool(const Device& device, DescriptorBank& bank) {
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
         .pNext = nullptr,
         .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
-        .maxSets = SETS_PER_POOL,
+        .maxSets = sets_per_pool,
         .poolSizeCount = static_cast<u32>(pool_cursor),
         .pPoolSizes = std::data(pool_sizes),
     }));
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 3ac18ea54..3bcd6d6cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -228,9 +228,7 @@ void RasterizerVulkan::Clear() {
     };
 
     const u32 color_attachment = regs.clear_buffers.RT;
-    const auto attachment_aspect_mask = framebuffer->ImageRanges()[color_attachment].aspectMask;
-    const bool is_color_rt = (attachment_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
-    if (use_color && is_color_rt) {
+    if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
         VkClearValue clear_value;
         std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
 
@@ -248,12 +246,15 @@ void RasterizerVulkan::Clear() {
         return;
     }
     VkImageAspectFlags aspect_flags = 0;
-    if (use_depth) {
+    if (use_depth && framebuffer->HasAspectDepthBit()) {
         aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT;
     }
-    if (use_stencil) {
+    if (use_stencil && framebuffer->HasAspectStencilBit()) {
         aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
     }
+    if (aspect_flags == 0) {
+        return;
+    }
     scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
                       clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
         VkClearAttachment attachment;
@@ -764,12 +765,7 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
     const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass;
     const Maxwell::ComparisonOp compare = regs.stencil_front_func_func;
     if (regs.stencil_two_side_enable) {
-        scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
-            cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
-                                   MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
-                                   MaxwellToVK::ComparisonOp(compare));
-        });
-    } else {
+        // Separate stencil op per face
         const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail;
         const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail;
         const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass;
@@ -784,6 +780,13 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
                                    MaxwellToVK::StencilOp(back_zfail),
                                    MaxwellToVK::ComparisonOp(back_compare));
         });
+    } else {
+        // Front face defines the stencil op of both faces
+        scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
+            cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
+                                   MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
+                                   MaxwellToVK::ComparisonOp(compare));
+        });
     }
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 4840962de..1d438787a 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() {
     worker_thread.join();
 }
 
-void VKScheduler::Flush(VkSemaphore semaphore) {
-    SubmitExecution(semaphore);
+void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+    SubmitExecution(signal_semaphore, wait_semaphore);
     AllocateNewContext();
 }
 
-void VKScheduler::Finish(VkSemaphore semaphore) {
+void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
     const u64 presubmit_tick = CurrentTick();
-    SubmitExecution(semaphore);
+    SubmitExecution(signal_semaphore, wait_semaphore);
     WaitWorker();
     Wait(presubmit_tick);
     AllocateNewContext();
@@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() {
     });
 }
 
-void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
+void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
     EndPendingOperations();
     InvalidateState();
 
     const u64 signal_value = master_semaphore->NextTick();
-    Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
+    Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
         cmdbuf.End();
-
-        const u32 num_signal_semaphores = semaphore ? 2U : 1U;
-
-        const u64 wait_value = signal_value - 1;
-        const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-
         const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+
+        const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
         const std::array signal_values{signal_value, u64(0)};
-        const std::array signal_semaphores{timeline_semaphore, semaphore};
+        const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
+
+        const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
+        const std::array wait_values{signal_value - 1, u64(1)};
+        const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
+        static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
+            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+            VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+        };
 
         const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
             .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
             .pNext = nullptr,
-            .waitSemaphoreValueCount = 1,
-            .pWaitSemaphoreValues = &wait_value,
+            .waitSemaphoreValueCount = num_wait_semaphores,
+            .pWaitSemaphoreValues = wait_values.data(),
             .signalSemaphoreValueCount = num_signal_semaphores,
             .pSignalSemaphoreValues = signal_values.data(),
         };
         const VkSubmitInfo submit_info{
             .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
             .pNext = &timeline_si,
-            .waitSemaphoreCount = 1,
-            .pWaitSemaphores = &timeline_semaphore,
-            .pWaitDstStageMask = &wait_stage_mask,
+            .waitSemaphoreCount = num_wait_semaphores,
+            .pWaitSemaphores = wait_semaphores.data(),
+            .pWaitDstStageMask = wait_stage_masks.data(),
             .commandBufferCount = 1,
             .pCommandBuffers = cmdbuf.address(),
             .signalSemaphoreCount = num_signal_semaphores,
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index cf39a2363..759ed5a48 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -34,10 +34,10 @@ public:
     ~VKScheduler();
 
     /// Sends the current execution context to the GPU.
-    void Flush(VkSemaphore semaphore = nullptr);
+    void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
 
     /// Sends the current execution context to the GPU and waits for it to complete.
-    void Finish(VkSemaphore semaphore = nullptr);
+    void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
 
     /// Waits for the worker thread to finish executing everything. After this function returns it's
     /// safe to touch worker resources.
@@ -191,7 +191,7 @@ private:
 
     void AllocateWorkerCommandBuffer();
 
-    void SubmitExecution(VkSemaphore semaphore);
+    void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
 
     void AllocateNewContext();
 
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 5f78f6950..d90935f52 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -110,10 +110,6 @@ public:
         return Exchange(Dirty::DepthTestEnable, false);
     }
 
-    bool TouchDepthBoundsEnable() {
-        return Exchange(Dirty::DepthBoundsEnable, false);
-    }
-
     bool TouchDepthWriteEnable() {
         return Exchange(Dirty::DepthWriteEnable, false);
     }
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index d990eefba..aadf03cb0 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -20,16 +20,15 @@ namespace Vulkan {
 
 namespace {
 
-VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) {
+VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) {
     if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
         VkSurfaceFormatKHR format;
         format.format = VK_FORMAT_B8G8R8A8_UNORM;
         format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
         return format;
     }
-    const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
-        const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
-        return format.format == request_format &&
+    const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
+        return format.format == VK_FORMAT_B8G8R8A8_UNORM &&
                format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
     });
     return found != formats.end() ? *found : formats[0];
@@ -107,14 +106,12 @@ void VKSwapchain::AcquireNextImage() {
 }
 
 void VKSwapchain::Present(VkSemaphore render_semaphore) {
-    const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
-    const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
     const auto present_queue{device.GetPresentQueue()};
     const VkPresentInfoKHR present_info{
         .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
         .pNext = nullptr,
-        .waitSemaphoreCount = render_semaphore ? 2U : 1U,
-        .pWaitSemaphores = semaphores.data(),
+        .waitSemaphoreCount = render_semaphore ? 1U : 0U,
+        .pWaitSemaphores = &render_semaphore,
         .swapchainCount = 1,
         .pSwapchains = swapchain.address(),
         .pImageIndices = &image_index,
@@ -145,7 +142,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
     const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
     const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
 
-    const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
+    const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
     const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
 
     u32 requested_image_count{capabilities.minImageCount + 1};
@@ -180,6 +177,17 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
         swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
         swapchain_ci.pQueueFamilyIndices = queue_indices.data();
     }
+    static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
+    VkImageFormatListCreateInfo format_list{
+        .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
+        .pNext = nullptr,
+        .viewFormatCount = static_cast<u32>(view_formats.size()),
+        .pViewFormats = view_formats.data(),
+    };
+    if (device.IsKhrSwapchainMutableFormatEnabled()) {
+        format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list);
+        swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR;
+    }
     // Request the size again to reduce the possibility of a TOCTOU race condition.
     const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
     swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -191,7 +199,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
 
     images = swapchain.GetImages();
     image_count = static_cast<u32>(images.size());
-    image_format = surface_format.format;
+    image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
 }
 
 void VKSwapchain::CreateSemaphores() {
@@ -207,7 +215,7 @@ void VKSwapchain::CreateImageViews() {
         .flags = 0,
         .image = {},
         .viewType = VK_IMAGE_VIEW_TYPE_2D,
-        .format = image_format,
+        .format = image_view_format,
         .components =
             {
                 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 35c2cdc14..5bce41e21 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -68,8 +68,12 @@ public:
         return *image_views[index];
     }
 
-    VkFormat GetImageFormat() const {
-        return image_format;
+    VkFormat GetImageViewFormat() const {
+        return image_view_format;
+    }
+
+    VkSemaphore CurrentPresentSemaphore() const {
+        return *present_semaphores[frame_index];
     }
 
 private:
@@ -96,7 +100,7 @@ private:
     u32 image_index{};
     u32 frame_index{};
 
-    VkFormat image_format{};
+    VkFormat image_view_format{};
     VkExtent2D extent{};
 
     bool current_srgb{};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 8f4df7122..ff979a7ac 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1186,9 +1186,12 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
         renderpass_key.depth_format = depth_buffer->format;
         num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
         images[num_images] = depth_buffer->ImageHandle();
-        image_ranges[num_images] = MakeSubresourceRange(depth_buffer);
+        const VkImageSubresourceRange subresource_range = MakeSubresourceRange(depth_buffer);
+        image_ranges[num_images] = subresource_range;
         samples = depth_buffer->Samples();
         ++num_images;
+        has_depth = (subresource_range.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0;
+        has_stencil = (subresource_range.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0;
     } else {
         renderpass_key.depth_format = PixelFormat::Invalid;
     }
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 5fe6b7ba3..6d5a68bfe 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -232,6 +232,18 @@ public:
         return image_ranges;
     }
 
+    [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
+        return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
+    }
+
+    [[nodiscard]] bool HasAspectDepthBit() const noexcept {
+        return has_depth;
+    }
+
+    [[nodiscard]] bool HasAspectStencilBit() const noexcept {
+        return has_stencil;
+    }
+
 private:
     vk::Framebuffer framebuffer;
     VkRenderPass renderpass{};
@@ -241,6 +253,8 @@ private:
     u32 num_images = 0;
     std::array<VkImage, 9> images{};
     std::array<VkImageSubresourceRange, 9> image_ranges{};
+    bool has_depth{};
+    bool has_stencil{};
 };
 
 struct TextureCacheParams {
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 8a4581c19..81a878bb2 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <filesystem>
 #include <fstream>
 #include <memory>
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index 6180b8c0e..74cd3c9d8 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <algorithm>
 #include <array>
 #include <bit>
 #include <concepts>
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 3b575db4d..cae543a51 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -37,7 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
 namespace VideoCore {
 
 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
-    const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
+    const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
+    const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
     const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
     auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
     auto context = emu_window.CreateSharedContext();
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index c7f0d26ce..2caf98c7c 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -611,6 +611,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
 
     graphics_queue = logical.GetQueue(graphics_family);
     present_queue = logical.GetQueue(present_family);
+
+    sets_per_pool = 64;
+    if (driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE) {
+        // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2.
+        sets_per_pool = 96;
+    }
 }
 
 Device::~Device() = default;
@@ -851,6 +857,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
     bool has_khr_shader_float16_int8{};
     bool has_khr_workgroup_memory_explicit_layout{};
     bool has_khr_pipeline_executable_properties{};
+    bool has_khr_image_format_list{};
+    bool has_khr_swapchain_mutable_format{};
     bool has_ext_subgroup_size_control{};
     bool has_ext_transform_feedback{};
     bool has_ext_custom_border_color{};
@@ -900,6 +908,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
         test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
         test(has_khr_workgroup_memory_explicit_layout,
              VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
+        test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false);
+        test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME,
+             false);
         test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
         if (Settings::values.enable_nsight_aftermath) {
             test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
@@ -1078,6 +1089,11 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
             khr_pipeline_executable_properties = true;
         }
     }
+    if (has_khr_image_format_list && has_khr_swapchain_mutable_format) {
+        extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
+        extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
+        khr_swapchain_mutable_format = true;
+    }
     if (khr_push_descriptor) {
         VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
         push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 234d74129..bc180a32a 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -224,6 +224,11 @@ public:
         return khr_pipeline_executable_properties;
     }
 
+    /// Returns true if VK_KHR_swapchain_mutable_format is enabled.
+    bool IsKhrSwapchainMutableFormatEnabled() const {
+        return khr_swapchain_mutable_format;
+    }
+
     /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
     bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
         return khr_workgroup_memory_explicit_layout;
@@ -318,6 +323,10 @@ public:
         return device_access_memory;
     }
 
+    u32 GetSetsPerPool() const {
+        return sets_per_pool;
+    }
+
 private:
     /// Checks if the physical device is suitable.
     void CheckSuitability(bool requires_swapchain) const;
@@ -371,6 +380,7 @@ private:
     VkShaderStageFlags guest_warp_stages{};     ///< Stages where the guest warp size can be forced.
     u64 device_access_memory{};                 ///< Total size of device local memory in bytes.
     u32 max_push_descriptors{};                 ///< Maximum number of push descriptors
+    u32 sets_per_pool{};                        ///< Sets per Description Pool
     bool is_optimal_astc_supported{};           ///< Support for native ASTC.
     bool is_float16_supported{};                ///< Support for float16 arithmetic.
     bool is_int8_supported{};                   ///< Support for int8 arithmetic.
@@ -390,6 +400,7 @@ private:
     bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
     bool khr_push_descriptor{};                  ///< Support for VK_KHR_push_descritor.
     bool khr_pipeline_executable_properties{};   ///< Support for executable properties.
+    bool khr_swapchain_mutable_format{};         ///< Support for VK_KHR_swapchain_mutable_format.
     bool ext_index_type_uint8{};                 ///< Support for VK_EXT_index_type_uint8.
     bool ext_sampler_filter_minmax{};            ///< Support for VK_EXT_sampler_filter_minmax.
     bool ext_depth_range_unrestricted{};         ///< Support for VK_EXT_depth_range_unrestricted.
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 85d292bcc..8744d8e5d 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -812,7 +812,7 @@ void Config::ReadRendererValues() {
     ReadGlobalSetting(Settings::values.use_disk_shader_cache);
     ReadGlobalSetting(Settings::values.gpu_accuracy);
     ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
-    ReadGlobalSetting(Settings::values.use_nvdec_emulation);
+    ReadGlobalSetting(Settings::values.nvdec_emulation);
     ReadGlobalSetting(Settings::values.accelerate_astc);
     ReadGlobalSetting(Settings::values.use_vsync);
     ReadGlobalSetting(Settings::values.shader_backend);
@@ -1349,7 +1349,10 @@ void Config::SaveRendererValues() {
                  static_cast<u32>(Settings::values.gpu_accuracy.GetDefault()),
                  Settings::values.gpu_accuracy.UsingGlobal());
     WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
-    WriteGlobalSetting(Settings::values.use_nvdec_emulation);
+    WriteSetting(QString::fromStdString(Settings::values.nvdec_emulation.GetLabel()),
+                 static_cast<u32>(Settings::values.nvdec_emulation.GetValue(global)),
+                 static_cast<u32>(Settings::values.nvdec_emulation.GetDefault()),
+                 Settings::values.nvdec_emulation.UsingGlobal());
     WriteGlobalSetting(Settings::values.accelerate_astc);
     WriteGlobalSetting(Settings::values.use_vsync);
     WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 9555f4498..4733227b6 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -182,5 +182,6 @@ private:
 Q_DECLARE_METATYPE(Settings::CPUAccuracy);
 Q_DECLARE_METATYPE(Settings::GPUAccuracy);
 Q_DECLARE_METATYPE(Settings::FullscreenMode);
+Q_DECLARE_METATYPE(Settings::NvdecEmulation);
 Q_DECLARE_METATYPE(Settings::RendererBackend);
 Q_DECLARE_METATYPE(Settings::ShaderBackend);
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 37e896258..c594164be 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -88,24 +88,30 @@ void ConfigureGraphics::SetConfiguration() {
     ui->api_widget->setEnabled(runtime_lock);
     ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
     ui->use_disk_shader_cache->setEnabled(runtime_lock);
-    ui->use_nvdec_emulation->setEnabled(runtime_lock);
+    ui->nvdec_emulation_widget->setEnabled(runtime_lock);
     ui->accelerate_astc->setEnabled(runtime_lock);
     ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
     ui->use_asynchronous_gpu_emulation->setChecked(
         Settings::values.use_asynchronous_gpu_emulation.GetValue());
-    ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue());
     ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue());
 
     if (Settings::IsConfiguringGlobal()) {
         ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue()));
         ui->fullscreen_mode_combobox->setCurrentIndex(
             static_cast<int>(Settings::values.fullscreen_mode.GetValue()));
+        ui->nvdec_emulation->setCurrentIndex(
+            static_cast<int>(Settings::values.nvdec_emulation.GetValue()));
         ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue());
     } else {
         ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend);
         ConfigurationShared::SetHighlight(ui->api_widget,
                                           !Settings::values.renderer_backend.UsingGlobal());
 
+        ConfigurationShared::SetPerGameSetting(ui->nvdec_emulation,
+                                               &Settings::values.nvdec_emulation);
+        ConfigurationShared::SetHighlight(ui->nvdec_emulation_widget,
+                                          !Settings::values.nvdec_emulation.UsingGlobal());
+
         ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox,
                                                &Settings::values.fullscreen_mode);
         ConfigurationShared::SetHighlight(ui->fullscreen_mode_label,
@@ -137,8 +143,6 @@ void ConfigureGraphics::ApplyConfiguration() {
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation,
                                              ui->use_asynchronous_gpu_emulation,
                                              use_asynchronous_gpu_emulation);
-    ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation,
-                                             ui->use_nvdec_emulation, use_nvdec_emulation);
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc,
                                              accelerate_astc);
 
@@ -147,6 +151,9 @@ void ConfigureGraphics::ApplyConfiguration() {
         if (Settings::values.renderer_backend.UsingGlobal()) {
             Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
         }
+        if (Settings::values.nvdec_emulation.UsingGlobal()) {
+            Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation());
+        }
         if (Settings::values.shader_backend.UsingGlobal()) {
             Settings::values.shader_backend.SetValue(shader_backend);
         }
@@ -180,6 +187,13 @@ void ConfigureGraphics::ApplyConfiguration() {
             }
         }
 
+        if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+            Settings::values.nvdec_emulation.SetGlobal(true);
+        } else {
+            Settings::values.nvdec_emulation.SetGlobal(false);
+            Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation());
+        }
+
         if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
             Settings::values.bg_red.SetGlobal(true);
             Settings::values.bg_green.SetGlobal(true);
@@ -278,6 +292,20 @@ Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
                                                   ConfigurationShared::USE_GLOBAL_OFFSET);
 }
 
+Settings::NvdecEmulation ConfigureGraphics::GetCurrentNvdecEmulation() const {
+    if (Settings::IsConfiguringGlobal()) {
+        return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex());
+    }
+
+    if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+        Settings::values.nvdec_emulation.SetGlobal(true);
+        return Settings::values.nvdec_emulation.GetValue();
+    }
+    Settings::values.nvdec_emulation.SetGlobal(false);
+    return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex() -
+                                                 ConfigurationShared::USE_GLOBAL_OFFSET);
+}
+
 void ConfigureGraphics::SetupPerGameUI() {
     if (Settings::IsConfiguringGlobal()) {
         ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal());
@@ -286,7 +314,7 @@ void ConfigureGraphics::SetupPerGameUI() {
         ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal());
         ui->use_asynchronous_gpu_emulation->setEnabled(
             Settings::values.use_asynchronous_gpu_emulation.UsingGlobal());
-        ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal());
+        ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal());
         ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal());
         ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal());
         ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal());
@@ -301,8 +329,6 @@ void ConfigureGraphics::SetupPerGameUI() {
 
     ConfigurationShared::SetColoredTristate(
         ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache);
-    ConfigurationShared::SetColoredTristate(
-        ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation);
     ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc,
                                             accelerate_astc);
     ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation,
@@ -316,4 +342,6 @@ void ConfigureGraphics::SetupPerGameUI() {
         static_cast<int>(Settings::values.fullscreen_mode.GetValue(true)));
     ConfigurationShared::InsertGlobalItem(
         ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true)));
+    ConfigurationShared::InsertGlobalItem(
+        ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true)));
 }
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index c866b911b..7d7ac329d 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -43,6 +43,7 @@ private:
     void SetupPerGameUI();
 
     Settings::RendererBackend GetCurrentGraphicsBackend() const;
+    Settings::NvdecEmulation GetCurrentNvdecEmulation() const;
 
     std::unique_ptr<Ui::ConfigureGraphics> ui;
     QColor bg_color;
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 099ddbb7c..1a12cfa4d 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -156,7 +156,7 @@
         <item>
          <widget class="QCheckBox" name="use_disk_shader_cache">
           <property name="text">
-           <string>Use disk shader cache</string>
+           <string>Use disk pipeline cache</string>
           </property>
          </widget>
         </item>
@@ -168,13 +168,6 @@
          </widget>
         </item>
         <item>
-         <widget class="QCheckBox" name="use_nvdec_emulation">
-          <property name="text">
-           <string>Use NVDEC emulation</string>
-          </property>
-         </widget>
-        </item>
-        <item>
           <widget class="QCheckBox" name="accelerate_astc">
             <property name="text">
               <string>Accelerate ASTC texture decoding</string>
@@ -182,6 +175,50 @@
           </widget>
         </item>
         <item>
+         <widget class="QWidget" name="nvdec_emulation_widget" native="true">
+          <layout class="QHBoxLayout" name="nvdec_emulation_layout">
+           <property name="leftMargin">
+            <number>0</number>
+           </property>
+           <property name="topMargin">
+            <number>0</number>
+           </property>
+           <property name="rightMargin">
+            <number>0</number>
+           </property>
+           <property name="bottomMargin">
+            <number>0</number>
+           </property>
+           <item>
+            <widget class="QLabel" name="nvdec_emulation_label">
+             <property name="text">
+              <string>NVDEC emulation:</string>
+             </property>
+            </widget>
+           </item>
+           <item>
+            <widget class="QComboBox" name="nvdec_emulation">
+             <item>
+              <property name="text">
+               <string>Disabled</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>CPU Decoding</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>GPU Decoding</string>
+              </property>
+             </item>
+            </widget>
+           </item>
+          </layout>
+         </widget>
+        </item>
+        <item>
          <widget class="QWidget" name="fullscreen_mode_layout" native="true">
           <layout class="QHBoxLayout" name="horizontalLayout_1">
            <property name="leftMargin">
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 5891f8299..b91abc2f0 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -82,7 +82,7 @@
            <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
           </property>
           <property name="text">
-           <string>Use asynchronous shader building (hack)</string>
+           <string>Use asynchronous shader building (Hack)</string>
           </property>
          </widget>
         </item>
@@ -92,7 +92,7 @@
             <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
           </property>
           <property name="text">
-           <string>Use Fast GPU Time (hack)</string>
+           <string>Use Fast GPU Time (Hack)</string>
           </property>
          </widget>
         </item>
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index e97804220..f9d949e75 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -515,16 +515,16 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
     QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));
     QAction* open_mod_location = context_menu.addAction(tr("Open Mod Data Location"));
     QAction* open_transferable_shader_cache =
-        context_menu.addAction(tr("Open Transferable Shader Cache"));
+        context_menu.addAction(tr("Open Transferable Pipeline Cache"));
     context_menu.addSeparator();
     QMenu* remove_menu = context_menu.addMenu(tr("Remove"));
     QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update"));
     QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC"));
     QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
-    QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache"));
-    QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache"));
+    QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache"));
+    QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache"));
     remove_menu->addSeparator();
-    QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches"));
+    QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Pipeline Caches"));
     QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
     QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
     QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index e36774cc6..77d53e7bc 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -3174,12 +3174,11 @@ std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProv
 }
 
 bool GMainWindow::ConfirmClose() {
-    if (emu_thread == nullptr || !UISettings::values.confirm_before_closing)
+    if (emu_thread == nullptr || !UISettings::values.confirm_before_closing) {
         return true;
-
-    QMessageBox::StandardButton answer =
-        QMessageBox::question(this, tr("yuzu"), tr("Are you sure you want to close yuzu?"),
-                              QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
+    }
+    const auto text = tr("Are you sure you want to close yuzu?");
+    const auto answer = QMessageBox::question(this, tr("yuzu"), text);
     return answer != QMessageBox::No;
 }
 
@@ -3261,14 +3260,13 @@ bool GMainWindow::ConfirmChangeGame() {
 }
 
 bool GMainWindow::ConfirmForceLockedExit() {
-    if (emu_thread == nullptr)
+    if (emu_thread == nullptr || !UISettings::values.confirm_before_closing) {
         return true;
+    }
+    const auto text = tr("The currently running application has requested yuzu to not exit.\n\n"
+                         "Would you like to bypass this and exit anyway?");
 
-    const auto answer =
-        QMessageBox::question(this, tr("yuzu"),
-                              tr("The currently running application has requested yuzu to not "
-                                 "exit.\n\nWould you like to bypass this and exit anyway?"),
-                              QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
+    const auto answer = QMessageBox::question(this, tr("yuzu"), text);
     return answer != QMessageBox::No;
 }
 
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 757dd1ea0..891f7be6f 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -465,7 +465,7 @@ void Config::ReadValues() {
     ReadSetting("Renderer", Settings::values.disable_fps_limit);
     ReadSetting("Renderer", Settings::values.shader_backend);
     ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
-    ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
+    ReadSetting("Renderer", Settings::values.nvdec_emulation);
     ReadSetting("Renderer", Settings::values.accelerate_astc);
     ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
 
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index e02eceb99..72f3213fb 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -261,9 +261,9 @@ shader_backend =
 # 0 (default): Off, 1: On
 use_asynchronous_shaders =
 
-# Enable NVDEC emulation.
-# 0: Off, 1 (default): On
-use_nvdec_emulation =
+# NVDEC emulation.
+# 0: Disabled, 1: CPU Decoding, 2 (default): GPU Decoding
+nvdec_emulation =
 
 # Accelerate ASTC texture decoding.
 # 0: Off, 1 (default): On