94 files changed, 1239 insertions, 941 deletions
diff --git a/.travis.yml b/.travis.yml
index 9512f7843..93fda1dfa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
     - os: osx
       env: NAME="macos build"
       sudo: false
-      osx_image: xcode10.1
+      osx_image: xcode10.2
       install: "./.travis/macos/deps.sh"
       script: "./.travis/macos/build.sh"
       after_success: "./.travis/macos/upload.sh"
diff --git a/.travis/linux-mingw/docker.sh b/.travis/linux-mingw/docker.sh
index b73a28693..28033acfb 100755
--- a/.travis/linux-mingw/docker.sh
+++ b/.travis/linux-mingw/docker.sh
@@ -13,8 +13,8 @@ echo '' >> /bin/cmd
 chmod +x /bin/cmd
 
 mkdir build && cd build
-cmake .. -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
-make -j4
+cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
+ninja
 
 # Clean up the dirty hacks
 rm /bin/uname && mv /bin/uname1 /bin/uname
diff --git a/.travis/linux/docker.sh b/.travis/linux/docker.sh
index a21a8f9e2..3a9970384 100755
--- a/.travis/linux/docker.sh
+++ b/.travis/linux/docker.sh
@@ -3,7 +3,7 @@
 cd /yuzu
 
 mkdir build && cd build
-cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -G Ninja
+cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
 ninja
 
 ccache -s
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh
index b7b4c6f8c..0abd1a93a 100755
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -7,6 +7,7 @@ export Qt5_DIR=$(brew --prefix)/opt/qt5
 export UNICORNDIR=$(pwd)/externals/unicorn
 export PATH="/usr/local/opt/ccache/libexec:$PATH"
 
+# TODO: Build using ninja instead of make
 mkdir build && cd build
 cmake --version
 cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6a417017c..bfa104034 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -132,7 +132,7 @@ find_package(Threads REQUIRED)
 if (ENABLE_SDL2)
     if (YUZU_USE_BUNDLED_SDL2)
         # Detect toolchain and platform
-        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
+        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
             set(SDL2_VER "SDL2-2.0.8")
         else()
             message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
@@ -165,7 +165,7 @@ if (YUZU_USE_BUNDLED_UNICORN)
     if (MSVC)
         message(STATUS "unicorn not found, falling back to bundled")
         # Detect toolchain and platform
-        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
+        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
             set(UNICORN_VER "unicorn-yuzu")
         else()
             message(FATAL_ERROR "No bundled Unicorn binaries for your toolchain. Disable YUZU_USE_BUNDLED_UNICORN and provide your own.")
@@ -233,7 +233,7 @@ endif()
 
 if (ENABLE_QT)
     if (YUZU_USE_BUNDLED_QT)
-        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
+        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
             set(QT_VER qt-5.12.0-msvc2017_64)
         else()
             message(FATAL_ERROR "No bundled Qt binaries for your toolchain. Disable YUZU_USE_BUNDLED_QT and provide your own.")
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 3f8b6cda8..e6fa11a03 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -7,6 +7,10 @@ include(DownloadExternals)
 add_library(catch-single-include INTERFACE)
 target_include_directories(catch-single-include INTERFACE catch/single_include)
 
+# libfmt
+add_subdirectory(fmt)
+add_library(fmt::fmt ALIAS fmt)
+
 # Dynarmic
 if (ARCHITECTURE_x86_64)
     set(DYNARMIC_TESTS OFF)
@@ -14,10 +18,6 @@ if (ARCHITECTURE_x86_64)
     add_subdirectory(dynarmic)
 endif()
 
-# libfmt
-add_subdirectory(fmt)
-add_library(fmt::fmt ALIAS fmt)
-
 # getopt
 if (MSVC)
     add_subdirectory(getopt)
diff --git a/externals/dynarmic b/externals/dynarmic
-Subproject 4e6848d1c9e8dadc70595c15b5589f8b14aad47
+Subproject 2683a9a3e316b5c3f387bbe6787732b9ff44b8d
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9aea4af87..04018233f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -21,15 +21,29 @@ if (MSVC)
     # Ensure that projects build with Unicode support.
     add_definitions(-DUNICODE -D_UNICODE)
 
-    # /W3 - Level 3 warnings
-    # /MP - Multi-threaded compilation
-    # /Zi - Output debugging information
-    # /Zo - enhanced debug info for optimized builds
-    # /permissive- - enables stricter C++ standards conformance checks
-    # /EHsc - C++-only exception handling semantics
-    # /Zc:throwingNew - let codegen assume `operator new` will never return null
-    # /Zc:inline - let codegen omit inline functions in object files
-    add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
+    # /W3                 - Level 3 warnings
+    # /MP                 - Multi-threaded compilation
+    # /Zi                 - Output debugging information
+    # /Zo                 - Enhanced debug info for optimized builds
+    # /permissive-        - Enables stricter C++ standards conformance checks
+    # /EHsc               - C++-only exception handling semantics
+    # /volatile:iso       - Use strict standards-compliant volatile semantics.
+    # /Zc:externConstexpr - Allow extern constexpr variables to have external linkage, like the standard mandates
+    # /Zc:inline          - Let codegen omit inline functions in object files
+    # /Zc:throwingNew     - Let codegen assume `operator new` (without std::nothrow) will never return null
+    add_compile_options(
+        /W3
+        /MP
+        /Zi
+        /Zo
+        /permissive-
+        /EHsc
+        /std:c++latest
+        /volatile:iso
+        /Zc:externConstexpr
+        /Zc:inline
+        /Zc:throwingNew
+    )
 
     # /GS- - No stack buffer overflow checks
     add_compile_options("$<$<CONFIG:Release>:/GS->")
@@ -37,7 +51,10 @@ if (MSVC)
     set(CMAKE_EXE_LINKER_FLAGS_DEBUG   "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
     set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
 else()
-    add_compile_options("-Wno-attributes")
+    add_compile_options(
+        -Wall
+        -Wno-attributes
+    )
 
     if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
         add_compile_options("-stdlib=libc++")
diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp
index 60a35c67c..978526492 100644
--- a/src/common/zstd_compression.cpp
+++ b/src/common/zstd_compression.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#pragma once
-
 #include <algorithm>
 #include <zstd.h>
 
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 1320bbe77..eda466a5d 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -10,6 +10,8 @@
 
 namespace Core::Frontend {
 
+GraphicsContext::~GraphicsContext() = default;
+
 class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>,
                               public std::enable_shared_from_this<TouchState> {
 public:
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 70a522556..e2c290dc1 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -19,6 +19,8 @@ namespace Core::Frontend {
  */
 class GraphicsContext {
 public:
+    virtual ~GraphicsContext();
+
     /// Makes the graphics context current for the caller thread
     virtual void MakeCurrent() = 0;
 
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index ac0e1d796..5bb139483 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -438,7 +438,7 @@ inline float RequestParser::Pop() {
 template <>
 inline double RequestParser::Pop() {
     const u64 value = Pop<u64>();
-    float real;
+    double real;
     std::memcpy(&real, &value, sizeof(real));
     return real;
 }
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index fe710eb6e..42d9dd844 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -58,7 +58,7 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
     auto& kernel = Core::System::GetInstance().Kernel();
     if (!writable_event) {
         // Create event if not provided
-        const auto pair = WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
+        const auto pair = WritableEvent::CreateEventPair(kernel, ResetType::Automatic,
                                                          "HLE Pause Event: " + reason);
         writable_event = pair.writable;
     }
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h
index 332876c27..2821176a7 100644
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -33,8 +33,8 @@ enum class HandleType : u32 {
 };
 
 enum class ResetType {
-    OneShot, ///< Reset automatically on object acquisition
-    Sticky,  ///< Never reset automatically
+    Automatic, ///< Reset automatically on object acquisition
+    Manual,    ///< Never reset automatically
 };
 
 class Object : NonCopyable {
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index c2b798a4e..06463cd26 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -21,8 +21,9 @@ bool ReadableEvent::ShouldWait(const Thread* thread) const {
 void ReadableEvent::Acquire(Thread* thread) {
     ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 
-    if (reset_type == ResetType::OneShot)
+    if (reset_type == ResetType::Automatic) {
         signaled = false;
+    }
 }
 
 void ReadableEvent::Signal() {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 2dcf174c5..5a5851f66 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1255,8 +1255,8 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
     return vm_manager.MapCodeMemory(dst_address, src_address, size);
 }
 
-ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address,
-                                  u64 src_address, u64 size) {
+static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle,
+                                         u64 dst_address, u64 src_address, u64 size) {
     LOG_DEBUG(Kernel_SVC,
               "called. process_handle=0x{:08X}, dst_address=0x{:016X}, src_address=0x{:016X}, "
               "size=0x{:016X}",
@@ -1342,7 +1342,7 @@ static void ExitProcess(Core::System& system) {
 /// Creates a new thread
 static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg,
                                VAddr stack_top, u32 priority, s32 processor_id) {
-    LOG_TRACE(Kernel_SVC,
+    LOG_DEBUG(Kernel_SVC,
               "called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, "
               "threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}",
               entry_point, arg, stack_top, priority, processor_id, *out_handle);
@@ -1402,7 +1402,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
 
 /// Starts the thread for the provided handle
 static ResultCode StartThread(Core::System& system, Handle thread_handle) {
-    LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
+    LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
 
     const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
@@ -1425,7 +1425,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
 
 /// Called when a thread exits
 static void ExitThread(Core::System& system) {
-    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
+    LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
 
     auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
     current_thread->Stop();
@@ -1435,7 +1435,7 @@ static void ExitThread(Core::System& system) {
 
 /// Sleep the current thread
 static void SleepThread(Core::System& system, s64 nanoseconds) {
-    LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
+    LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds);
 
     enum class SleepType : s64 {
         YieldWithoutLoadBalancing = 0,
@@ -1880,52 +1880,59 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
 }
 
 static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
-                                    u64 mask) {
-    LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle,
-              mask, core);
+                                    u64 affinity_mask) {
+    LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}",
+              thread_handle, core, affinity_mask);
 
-    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
-    const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
-    if (!thread) {
-        LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
-                  thread_handle);
-        return ERR_INVALID_HANDLE;
-    }
+    const auto* const current_process = system.Kernel().CurrentProcess();
 
     if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
-        const u8 ideal_cpu_core = thread->GetOwnerProcess()->GetIdealCore();
+        const u8 ideal_cpu_core = current_process->GetIdealCore();
 
         ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
 
         // Set the target CPU to the ideal core specified by the process.
         core = ideal_cpu_core;
-        mask = 1ULL << core;
-    }
-
-    if (mask == 0) {
-        LOG_ERROR(Kernel_SVC, "Mask is 0");
-        return ERR_INVALID_COMBINATION;
-    }
+        affinity_mask = 1ULL << core;
+    } else {
+        const u64 core_mask = current_process->GetCoreMask();
+
+        if ((core_mask | affinity_mask) != core_mask) {
+            LOG_ERROR(
+                Kernel_SVC,
+                "Invalid processor ID specified (core_mask=0x{:08X}, affinity_mask=0x{:016X})",
+                core_mask, affinity_mask);
+            return ERR_INVALID_PROCESSOR_ID;
+        }
 
-    /// This value is used to only change the affinity mask without changing the current ideal core.
-    static constexpr u32 OnlyChangeMask = static_cast<u32>(-3);
+        if (affinity_mask == 0) {
+            LOG_ERROR(Kernel_SVC, "Specfified affinity mask is zero.");
+            return ERR_INVALID_COMBINATION;
+        }
 
-    if (core == OnlyChangeMask) {
-        core = thread->GetIdealCore();
-    } else if (core >= Core::NUM_CPU_CORES && core != static_cast<u32>(-1)) {
-        LOG_ERROR(Kernel_SVC, "Invalid core specified, got {}", core);
-        return ERR_INVALID_PROCESSOR_ID;
+        if (core < Core::NUM_CPU_CORES) {
+            if ((affinity_mask & (1ULL << core)) == 0) {
+                LOG_ERROR(Kernel_SVC,
+                          "Core is not enabled for the current mask, core={}, mask={:016X}", core,
+                          affinity_mask);
+                return ERR_INVALID_COMBINATION;
+            }
+        } else if (core != static_cast<u32>(THREADPROCESSORID_DONT_CARE) &&
+                   core != static_cast<u32>(THREADPROCESSORID_DONT_UPDATE)) {
+            LOG_ERROR(Kernel_SVC, "Invalid processor ID specified (core={}).", core);
+            return ERR_INVALID_PROCESSOR_ID;
+        }
     }
 
-    // Error out if the input core isn't enabled in the input mask.
-    if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) {
-        LOG_ERROR(Kernel_SVC, "Core is not enabled for the current mask, core={}, mask={:016X}",
-                  core, mask);
-        return ERR_INVALID_COMBINATION;
+    const auto& handle_table = current_process->GetHandleTable();
+    const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
+    if (!thread) {
+        LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
+                  thread_handle);
+        return ERR_INVALID_HANDLE;
     }
 
-    thread->ChangeCore(core, mask);
-
+    thread->ChangeCore(core, affinity_mask);
     return RESULT_SUCCESS;
 }
 
@@ -1980,7 +1987,7 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle
 
     auto& kernel = system.Kernel();
     const auto [readable_event, writable_event] =
-        WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent");
+        WritableEvent::CreateEventPair(kernel, ResetType::Manual, "CreateEvent");
 
     HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();
 
@@ -2183,8 +2190,8 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
     return RESULT_SUCCESS;
 }
 
-ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
-                         u32 out_thread_ids_size, Handle debug_handle) {
+static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
+                                u32 out_thread_ids_size, Handle debug_handle) {
     // TODO: Handle this case when debug events are supported.
     UNIMPLEMENTED_IF(debug_handle != InvalidHandle);
 
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index f07332f02..b4b9cda7c 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -30,12 +30,21 @@ enum ThreadPriority : u32 {
 };
 
 enum ThreadProcessorId : s32 {
-    THREADPROCESSORID_IDEAL = -2, ///< Run thread on the ideal core specified by the process.
-    THREADPROCESSORID_0 = 0,      ///< Run thread on core 0
-    THREADPROCESSORID_1 = 1,      ///< Run thread on core 1
-    THREADPROCESSORID_2 = 2,      ///< Run thread on core 2
-    THREADPROCESSORID_3 = 3,      ///< Run thread on core 3
-    THREADPROCESSORID_MAX = 4,    ///< Processor ID must be less than this
+    /// Indicates that no particular processor core is preferred.
+    THREADPROCESSORID_DONT_CARE = -1,
+
+    /// Run thread on the ideal core specified by the process.
+    THREADPROCESSORID_IDEAL = -2,
+
+    /// Indicates that the preferred processor ID shouldn't be updated in
+    /// a core mask setting operation.
+    THREADPROCESSORID_DONT_UPDATE = -3,
+
+    THREADPROCESSORID_0 = 0,   ///< Run thread on core 0
+    THREADPROCESSORID_1 = 1,   ///< Run thread on core 1
+    THREADPROCESSORID_2 = 2,   ///< Run thread on core 2
+    THREADPROCESSORID_3 = 3,   ///< Run thread on core 3
+    THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
 
     /// Allowed CPU mask
     THREADPROCESSORID_DEFAULT_MASK = (1 << THREADPROCESSORID_0) | (1 << THREADPROCESSORID_1) |
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 26a665bfd..1a32a109f 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -276,7 +276,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
     RegisterHandlers(functions);
 
     auto& kernel = Core::System::GetInstance().Kernel();
-    launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
+    launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
                                                               "ISelfController:LaunchableEvent");
 }
 
@@ -442,10 +442,10 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c
 
 AppletMessageQueue::AppletMessageQueue() {
     auto& kernel = Core::System::GetInstance().Kernel();
-    on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
+    on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
                                                             "AMMessageQueue:OnMessageRecieved");
     on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::OneShot, "AMMessageQueue:OperationModeChanged");
+        kernel, Kernel::ResetType::Automatic, "AMMessageQueue:OperationModeChanged");
 }
 
 AppletMessageQueue::~AppletMessageQueue() = default;
@@ -835,6 +835,7 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
+        return;
     }
 
     std::memcpy(backing.buffer.data() + offset, data.data(), data.size());
@@ -857,6 +858,7 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
+        return;
     }
 
     ctx.WriteBuffer(backing.buffer.data() + offset, size);
diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp
index 7f70b10df..e812c66e9 100644
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -26,11 +26,11 @@ namespace Service::AM::Applets {
 AppletDataBroker::AppletDataBroker() {
     auto& kernel = Core::System::GetInstance().Kernel();
     state_changed_event = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:StateChangedEvent");
+        kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent");
     pop_out_data_event = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopDataOutEvent");
+        kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopDataOutEvent");
     pop_interactive_out_data_event = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
+        kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
 }
 
 AppletDataBroker::~AppletDataBroker() = default;
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index 51d8c26b4..bd4e38461 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -68,7 +68,7 @@ AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs
     RegisterHandlers(functions);
 
     auto& kernel = Core::System::GetInstance().Kernel();
-    aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
+    aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
                                                               "GetAddOnContentListChanged:Event");
 }
 
diff --git a/src/core/hle/service/audio/audctl.cpp b/src/core/hle/service/audio/audctl.cpp
index f43e512e9..6a01d4d29 100644
--- a/src/core/hle/service/audio/audctl.cpp
+++ b/src/core/hle/service/audio/audctl.cpp
@@ -50,7 +50,7 @@ void AudCtl::GetTargetVolumeMin(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Audio, "called.");
 
     // This service function is currently hardcoded on the
-    // actual console to this value (as of 6.0.0).
+    // actual console to this value (as of 8.0.0).
     constexpr s32 target_min_volume = 0;
 
     IPC::ResponseBuilder rb{ctx, 3};
@@ -62,7 +62,7 @@ void AudCtl::GetTargetVolumeMax(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Audio, "called.");
 
     // This service function is currently hardcoded on the
-    // actual console to this value (as of 6.0.0).
+    // actual console to this value (as of 8.0.0).
     constexpr s32 target_max_volume = 15;
 
     IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 12875fb42..6ba41b20a 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -67,7 +67,7 @@ public:
         // This is the event handle used to check if the audio buffer was released
         auto& system = Core::System::GetInstance();
         buffer_event = Kernel::WritableEvent::CreateEventPair(
-            system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
+            system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased");
 
         stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
                                        audio_params.channel_count, std::move(unique_name),
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 1dde6edb7..75db0c2dc 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -8,6 +8,7 @@
 
 #include "audio_core/audio_renderer.h"
 #include "common/alignment.h"
+#include "common/bit_util.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
 #include "common/string_util.h"
@@ -46,7 +47,7 @@ public:
 
         auto& system = Core::System::GetInstance();
         system_event = Kernel::WritableEvent::CreateEventPair(
-            system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
+            system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
         renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
                                                               system_event.writable);
     }
@@ -178,7 +179,7 @@ public:
         RegisterHandlers(functions);
 
         auto& kernel = Core::System::GetInstance().Kernel();
-        buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
+        buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
                                                               "IAudioOutBufferReleasedEvent");
     }
 
@@ -262,64 +263,304 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
     OpenAudioRendererImpl(ctx);
 }
 
+static u64 CalculateNumPerformanceEntries(const AudioCore::AudioRendererParameter& params) {
+    // +1 represents the final mix.
+    return u64{params.effect_count} + params.submix_count + params.sink_count + params.voice_count +
+           1;
+}
+
 void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
     LOG_DEBUG(Service_Audio, "called");
 
-    u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
-    buffer_sz += params.submix_count * 1024;
-    buffer_sz += 0x940 * (params.submix_count + 1);
-    buffer_sz += 0x3F0 * params.voice_count;
-    buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
-    buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
-    buffer_sz += Common::AlignUp(
-        (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
-            (params.mix_buffer_count + 6),
-        0x40);
-
-    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        const u32 count = params.submix_count + 1;
-        u64 node_count = Common::AlignUp(count, 0x40);
-        const u64 node_state_buffer_sz =
-            4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
-        u64 edge_matrix_buffer_sz = 0;
-        node_count = Common::AlignUp(count * count, 0x40);
-        if (node_count >> 31 != 0) {
-            edge_matrix_buffer_sz = (node_count | 7) / 8;
-        } else {
-            edge_matrix_buffer_sz = node_count / 8;
+    // Several calculations below align the sizes being calculated
+    // onto a 64 byte boundary.
+    static constexpr u64 buffer_alignment_size = 64;
+
+    // Some calculations that calculate portions of the buffer
+    // that will contain information, on the other hand, align
+    // the result of some of their calcularions on a 16 byte boundary.
+    static constexpr u64 info_field_alignment_size = 16;
+
+    // Maximum detail entries that may exist at one time for performance
+    // frame statistics.
+    static constexpr u64 max_perf_detail_entries = 100;
+
+    // Size of the data structure representing the bulk of the voice-related state.
+    static constexpr u64 voice_state_size = 0x100;
+
+    // Size of the upsampler manager data structure
+    constexpr u64 upsampler_manager_size = 0x48;
+
+    // Calculates the part of the size that relates to mix buffers.
+    const auto calculate_mix_buffer_sizes = [](const AudioCore::AudioRendererParameter& params) {
+        // As of 8.0.0 this is the maximum on voice channels.
+        constexpr u64 max_voice_channels = 6;
+
+        // The service expects the sample_count member of the parameters to either be
+        // a value of 160 or 240, so the maximum sample count is assumed in order
+        // to adequately handle all values at runtime.
+        constexpr u64 default_max_sample_count = 240;
+
+        const u64 total_mix_buffers = params.mix_buffer_count + max_voice_channels;
+
+        u64 size = 0;
+        size += total_mix_buffers * (sizeof(s32) * params.sample_count);
+        size += total_mix_buffers * (sizeof(s32) * default_max_sample_count);
+        size += u64{params.submix_count} + params.sink_count;
+        size = Common::AlignUp(size, buffer_alignment_size);
+        size += Common::AlignUp(params.unknown_30, buffer_alignment_size);
+        size += Common::AlignUp(sizeof(s32) * params.mix_buffer_count, buffer_alignment_size);
+        return size;
+    };
+
+    // Calculates the portion of the size related to the mix data (and the sorting thereof).
+    const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) {
+        // The size of the mixing info data structure.
+        constexpr u64 mix_info_size = 0x940;
+
+        // Consists of total submixes with the final mix included.
+        const u64 total_mix_count = u64{params.submix_count} + 1;
+
+        // The total number of effects that may be available to the audio renderer at any time.
+        constexpr u64 max_effects = 256;
+
+        // Calculates the part of the size related to the audio node state.
+        // This will only be used if the audio revision supports the splitter.
+        const auto calculate_node_state_size = [](std::size_t num_nodes) {
+            // Internally within a nodestate, it appears to use a data structure
+            // similar to a std::bitset<64> twice.
+            constexpr u64 bit_size = Common::BitSize<u64>();
+            constexpr u64 num_bitsets = 2;
+
+            // Node state instances have three states internally for performing
+            // depth-first searches of nodes. Initialized, Found, and Done Sorting.
+            constexpr u64 num_states = 3;
+
+            u64 size = 0;
+            size += (num_nodes * num_nodes) * sizeof(s32);
+            size += num_states * (num_nodes * sizeof(s32));
+            size += num_bitsets * (Common::AlignUp(num_nodes, bit_size) / Common::BitSize<u8>());
+            return size;
+        };
+
+        // Calculates the part of the size related to the adjacency (aka edge) matrix.
+        const auto calculate_edge_matrix_size = [](std::size_t num_nodes) {
+            return (num_nodes * num_nodes) * sizeof(s32);
+        };
+
+        u64 size = 0;
+        size += Common::AlignUp(sizeof(void*) * total_mix_count, info_field_alignment_size);
+        size += Common::AlignUp(mix_info_size * total_mix_count, info_field_alignment_size);
+        size += Common::AlignUp(sizeof(s32) * max_effects * params.submix_count,
+                                info_field_alignment_size);
+
+        if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
+            size += Common::AlignUp(calculate_node_state_size(total_mix_count) +
+                                        calculate_edge_matrix_size(total_mix_count),
+                                    info_field_alignment_size);
         }
-        buffer_sz += Common::AlignUp(node_state_buffer_sz + edge_matrix_buffer_sz, 0x10);
-    }
 
-    buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
-    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        buffer_sz += 0xE0 * params.num_splitter_send_channels;
-        buffer_sz += 0x20 * params.splitter_count;
-        buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
-    }
-    buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
-    u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
-                    ((params.voice_count * 256) | 0x40);
-
-    if (params.performance_frame_count >= 1) {
-        output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
-                                      16 * params.voice_count + 16) +
-                                     0x658) *
-                                            (params.performance_frame_count + 1) +
-                                        0xc0,
-                                    0x40) +
-                    output_sz;
-    }
-    output_sz = Common::AlignUp(output_sz + 0x1807e, 0x1000);
+        return size;
+    };
 
-    IPC::ResponseBuilder rb{ctx, 4};
+    // Calculates the part of the size related to voice channel info.
+    const auto calculate_voice_info_size = [](const AudioCore::AudioRendererParameter& params) {
+        constexpr u64 voice_info_size = 0x220;
+        constexpr u64 voice_resource_size = 0xD0;
+
+        u64 size = 0;
+        size += Common::AlignUp(sizeof(void*) * params.voice_count, info_field_alignment_size);
+        size += Common::AlignUp(voice_info_size * params.voice_count, info_field_alignment_size);
+        size +=
+            Common::AlignUp(voice_resource_size * params.voice_count, info_field_alignment_size);
+        size += Common::AlignUp(voice_state_size * params.voice_count, info_field_alignment_size);
+        return size;
+    };
+
+    // Calculates the part of the size related to memory pools.
+    const auto calculate_memory_pools_size = [](const AudioCore::AudioRendererParameter& params) {
+        const u64 num_memory_pools = sizeof(s32) * (u64{params.effect_count} + params.voice_count);
+        const u64 memory_pool_info_size = 0x20;
+        return Common::AlignUp(num_memory_pools * memory_pool_info_size, info_field_alignment_size);
+    };
+
+    // Calculates the part of the size related to the splitter context.
+    const auto calculate_splitter_context_size =
+        [this](const AudioCore::AudioRendererParameter& params) -> u64 {
+        if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
+            return 0;
+        }
+
+        constexpr u64 splitter_info_size = 0x20;
+        constexpr u64 splitter_destination_data_size = 0xE0;
+
+        u64 size = 0;
+        size += params.num_splitter_send_channels;
+        size +=
+            Common::AlignUp(splitter_info_size * params.splitter_count, info_field_alignment_size);
+        size += Common::AlignUp(splitter_destination_data_size * params.num_splitter_send_channels,
+                                info_field_alignment_size);
+
+        return size;
+    };
+
+    // Calculates the part of the size related to the upsampler info.
+    const auto calculate_upsampler_info_size = [](const AudioCore::AudioRendererParameter& params) {
+        constexpr u64 upsampler_info_size = 0x280;
+        // Yes, using the buffer size over info alignment size is intentional here.
+        return Common::AlignUp(upsampler_info_size * (u64{params.submix_count} + params.sink_count),
+                               buffer_alignment_size);
+    };
+
+    // Calculates the part of the size related to effect info.
+    const auto calculate_effect_info_size = [](const AudioCore::AudioRendererParameter& params) {
+        constexpr u64 effect_info_size = 0x2B0;
+        return Common::AlignUp(effect_info_size * params.effect_count, info_field_alignment_size);
+    };
+
+    // Calculates the part of the size related to audio sink info.
+    const auto calculate_sink_info_size = [](const AudioCore::AudioRendererParameter& params) {
+        const u64 sink_info_size = 0x170;
+        return Common::AlignUp(sink_info_size * params.sink_count, info_field_alignment_size);
+    };
+
+    // Calculates the part of the size related to voice state info.
+    const auto calculate_voice_state_size = [](const AudioCore::AudioRendererParameter& params) {
+        const u64 voice_state_size = 0x100;
+        const u64 additional_size = buffer_alignment_size - 1;
+        return Common::AlignUp(voice_state_size * params.voice_count + additional_size,
+                               info_field_alignment_size);
+    };
+
+    // Calculates the part of the size related to performance statistics.
+    const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) {
+        // Extra size value appended to the end of the calculation.
+        constexpr u64 appended = 128;
+
+        // Whether or not we assume the newer version of performance metrics data structures.
+        const bool is_v2 =
+            IsFeatureSupported(AudioFeatures::PerformanceMetricsVersion2, params.revision);
+
+        // Data structure sizes
+        constexpr u64 perf_statistics_size = 0x0C;
+        const u64 header_size = is_v2 ? 0x30 : 0x18;
+        const u64 entry_size = is_v2 ? 0x18 : 0x10;
+        const u64 detail_size = is_v2 ? 0x18 : 0x10;
+
+        const u64 entry_count = CalculateNumPerformanceEntries(params);
+        const u64 size_per_frame =
+            header_size + (entry_size * entry_count) + (detail_size * max_perf_detail_entries);
+
+        u64 size = 0;
+        size += Common::AlignUp(size_per_frame * params.performance_frame_count + 1,
+                                buffer_alignment_size);
+        size += Common::AlignUp(perf_statistics_size, buffer_alignment_size);
+        size += appended;
+        return size;
+    };
+
+    // Calculates the part of the size that relates to the audio command buffer.
+    const auto calculate_command_buffer_size =
+        [this](const AudioCore::AudioRendererParameter& params) {
+            constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
+
+            if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) {
+                constexpr u64 command_buffer_size = 0x18000;
+
+                return command_buffer_size + alignment;
+            }
+
+            // When the variadic command buffer is supported, this means
+            // the command generator for the audio renderer can issue commands
+            // that are (as one would expect), variable in size. So what we need to do
+            // is determine the maximum possible size for a few command data structures
+            // then multiply them by the amount of present commands indicated by the given
+            // respective audio parameters.
+
+            constexpr u64 max_biquad_filters = 2;
+            constexpr u64 max_mix_buffers = 24;
+
+            constexpr u64 biquad_filter_command_size = 0x2C;
+
+            constexpr u64 depop_mix_command_size = 0x24;
+            constexpr u64 depop_setup_command_size = 0x50;
+
+            constexpr u64 effect_command_max_size = 0x540;
+
+            constexpr u64 mix_command_size = 0x1C;
+            constexpr u64 mix_ramp_command_size = 0x24;
+            constexpr u64 mix_ramp_grouped_command_size = 0x13C;
+
+            constexpr u64 perf_command_size = 0x28;
+
+            constexpr u64 sink_command_size = 0x130;
+
+            constexpr u64 submix_command_max_size =
+                depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers;
+
+            constexpr u64 volume_command_size = 0x1C;
+            constexpr u64 volume_ramp_command_size = 0x20;
+
+            constexpr u64 voice_biquad_filter_command_size =
+                biquad_filter_command_size * max_biquad_filters;
+            constexpr u64 voice_data_command_size = 0x9C;
+            const u64 voice_command_max_size =
+                (params.splitter_count * depop_setup_command_size) +
+                (voice_data_command_size + voice_biquad_filter_command_size +
+                 volume_ramp_command_size + mix_ramp_grouped_command_size);
+
+            // Now calculate the individual elements that comprise the size and add them together.
+            const u64 effect_commands_size = params.effect_count * effect_command_max_size;
+
+            const u64 final_mix_commands_size =
+                depop_mix_command_size + volume_command_size * max_mix_buffers;
 
+            const u64 perf_commands_size =
+                perf_command_size *
+                (CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
+
+            const u64 sink_commands_size = params.sink_count * sink_command_size;
+
+            const u64 splitter_commands_size =
+                params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size;
+
+            const u64 submix_commands_size = params.submix_count * submix_command_max_size;
+
+            const u64 voice_commands_size = params.voice_count * voice_command_max_size;
+
+            return effect_commands_size + final_mix_commands_size + perf_commands_size +
+                   sink_commands_size + splitter_commands_size + submix_commands_size +
+                   voice_commands_size + alignment;
+        };
+
+    IPC::RequestParser rp{ctx};
+    const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
+
+    u64 size = 0;
+    size += calculate_mix_buffer_sizes(params);
+    size += calculate_mix_info_size(params);
+    size += calculate_voice_info_size(params);
+    size += upsampler_manager_size;
+    size += calculate_memory_pools_size(params);
+    size += calculate_splitter_context_size(params);
+
+    size = Common::AlignUp(size, buffer_alignment_size);
+
+    size += calculate_upsampler_info_size(params);
+    size += calculate_effect_info_size(params);
+    size += calculate_sink_info_size(params);
+    size += calculate_voice_state_size(params);
+    size += calculate_perf_size(params);
+    size += calculate_command_buffer_size(params);
+
+    // finally, 4KB page align the size, and we're done.
+    size = Common::AlignUp(size, 4096);
+
+    IPC::ResponseBuilder rb{ctx, 4};
     rb.Push(RESULT_SUCCESS);
-    rb.Push<u64>(output_sz);
+    rb.Push<u64>(size);
 
-    LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz);
+    LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", size);
 }
 
 void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
@@ -357,10 +598,15 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
 }
 
 bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
-    u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
+    // Byte swap
+    const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0');
+
     switch (feature) {
     case AudioFeatures::Splitter:
-        return version_num >= 2u;
+        return version_num >= 2U;
+    case AudioFeatures::PerformanceMetricsVersion2:
+    case AudioFeatures::VariadicCommandBuffer:
+        return version_num >= 5U;
     default:
         return false;
     }
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index e55d25973..1d3c8df61 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -28,6 +28,8 @@ private:
 
     enum class AudioFeatures : u32 {
         Splitter,
+        PerformanceMetricsVersion2,
+        VariadicCommandBuffer,
     };
 
     bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const;
diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp
index 974ff8e1a..3c7ca2c44 100644
--- a/src/core/hle/service/btdrv/btdrv.cpp
+++ b/src/core/hle/service/btdrv/btdrv.cpp
@@ -34,8 +34,8 @@ public:
         RegisterHandlers(functions);
 
         auto& kernel = Core::System::GetInstance().Kernel();
-        register_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
-                                                                "BT:RegisterEvent");
+        register_event = Kernel::WritableEvent::CreateEventPair(
+            kernel, Kernel::ResetType::Automatic, "BT:RegisterEvent");
     }
 
 private:
diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp
index 4f15c3f19..b439ee7ec 100644
--- a/src/core/hle/service/btm/btm.cpp
+++ b/src/core/hle/service/btm/btm.cpp
@@ -57,13 +57,13 @@ public:
         RegisterHandlers(functions);
 
         auto& kernel = Core::System::GetInstance().Kernel();
-        scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
+        scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
                                                             "IBtmUserCore:ScanEvent");
         connection_event = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::OneShot, "IBtmUserCore:ConnectionEvent");
+            kernel, Kernel::ResetType::Automatic, "IBtmUserCore:ConnectionEvent");
         service_discovery = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::OneShot, "IBtmUserCore:Discovery");
-        config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
+            kernel, Kernel::ResetType::Automatic, "IBtmUserCore:Discovery");
+        config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
                                                               "IBtmUserCore:ConfigEvent");
     }
 
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index e7fc7a619..fdd6d79a2 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -170,7 +170,7 @@ void Controller_NPad::InitNewlyAddedControler(std::size_t controller_idx) {
 void Controller_NPad::OnInit() {
     auto& kernel = Core::System::GetInstance().Kernel();
     styleset_changed_event = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::OneShot, "npad:NpadStyleSetChanged");
+        kernel, Kernel::ResetType::Automatic, "npad:NpadStyleSetChanged");
 
     if (!IsControllerActivated()) {
         return;
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index c6babdd4d..a5cb06f8a 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -26,7 +26,7 @@ constexpr ResultCode ERR_NO_APPLICATION_AREA(ErrorModule::NFP, 152);
 Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
     : ServiceFramework(name), module(std::move(module)) {
     auto& kernel = Core::System::GetInstance().Kernel();
-    nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
+    nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
                                                           "IUser:NFCTagDetected");
 }
 
@@ -67,9 +67,9 @@ public:
 
         auto& kernel = Core::System::GetInstance().Kernel();
         deactivate_event = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::OneShot, "IUser:DeactivateEvent");
+            kernel, Kernel::ResetType::Automatic, "IUser:DeactivateEvent");
         availability_change_event = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::OneShot, "IUser:AvailabilityChangeEvent");
+            kernel, Kernel::ResetType::Automatic, "IUser:AvailabilityChangeEvent");
     }
 
 private:
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index f92571008..76b12b482 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -62,9 +62,9 @@ public:
         RegisterHandlers(functions);
 
         auto& kernel = Core::System::GetInstance().Kernel();
-        event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
+        event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
                                                         "IRequest:Event1");
-        event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
+        event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
                                                         "IRequest:Event2");
     }
 
diff --git a/src/core/hle/service/nim/nim.cpp b/src/core/hle/service/nim/nim.cpp
index 0dabcd23b..f319a3ca1 100644
--- a/src/core/hle/service/nim/nim.cpp
+++ b/src/core/hle/service/nim/nim.cpp
@@ -141,7 +141,7 @@ public:
 
         auto& kernel = Core::System::GetInstance().Kernel();
         finished_event = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::OneShot,
+            kernel, Kernel::ResetType::Automatic,
             "IEnsureNetworkClockAvailabilityService:FinishEvent");
     }
 
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index 3b9ab4b14..b60fc748b 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -129,7 +129,7 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
     RegisterHandlers(functions);
 
     auto& kernel = Core::System::GetInstance().Kernel();
-    query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
+    query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
                                                          "NVDRV::query_event");
 }
 
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 4d150fc71..5731e815f 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -16,7 +16,7 @@ namespace Service::NVFlinger {
 
 BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) {
     auto& kernel = Core::System::GetInstance().Kernel();
-    buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
+    buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
                                                                "BufferQueue NativeHandle");
 }
 
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index 4ecb6bcef..298d85011 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -2,16 +2,15 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <chrono>
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/client_port.h"
-#include "core/hle/kernel/client_session.h"
 #include "core/hle/service/set/set.h"
 #include "core/settings.h"
 
 namespace Service::Set {
-
+namespace {
 constexpr std::array<LanguageCode, 17> available_language_codes = {{
     LanguageCode::JA,
     LanguageCode::EN_US,
@@ -32,41 +31,35 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{
     LanguageCode::ZH_HANT,
 }};
 
-constexpr std::size_t pre4_0_0_max_entries = 0xF;
-constexpr std::size_t post4_0_0_max_entries = 0x40;
+constexpr std::size_t pre4_0_0_max_entries = 15;
+constexpr std::size_t post4_0_0_max_entries = 17;
 
 constexpr ResultCode ERR_INVALID_LANGUAGE{ErrorModule::Settings, 625};
 
-LanguageCode GetLanguageCodeFromIndex(std::size_t index) {
-    return available_language_codes.at(index);
+void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t num_language_codes) {
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(RESULT_SUCCESS);
+    rb.Push(static_cast<u32>(num_language_codes));
 }
 
-template <std::size_t size>
-static std::array<LanguageCode, size> MakeLanguageCodeSubset() {
-    std::array<LanguageCode, size> arr;
-    std::copy_n(available_language_codes.begin(), size, arr.begin());
-    return arr;
+void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_size) {
+    const std::size_t requested_amount = ctx.GetWriteBufferSize() / sizeof(LanguageCode);
+    const std::size_t copy_amount = std::min(requested_amount, max_size);
+    const std::size_t copy_size = copy_amount * sizeof(LanguageCode);
+
+    ctx.WriteBuffer(available_language_codes.data(), copy_size);
+    PushResponseLanguageCode(ctx, copy_amount);
 }
+} // Anonymous namespace
 
-static void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t max_size) {
-    IPC::ResponseBuilder rb{ctx, 3};
-    rb.Push(RESULT_SUCCESS);
-    if (available_language_codes.size() > max_size) {
-        rb.Push(static_cast<u32>(max_size));
-    } else {
-        rb.Push(static_cast<u32>(available_language_codes.size()));
-    }
+LanguageCode GetLanguageCodeFromIndex(std::size_t index) {
+    return available_language_codes.at(index);
 }
 
 void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_SET, "called");
 
-    if (available_language_codes.size() > pre4_0_0_max_entries) {
-        ctx.WriteBuffer(MakeLanguageCodeSubset<pre4_0_0_max_entries>());
-    } else {
-        ctx.WriteBuffer(available_language_codes);
-    }
-    PushResponseLanguageCode(ctx, pre4_0_0_max_entries);
+    GetAvailableLanguageCodesImpl(ctx, pre4_0_0_max_entries);
 }
 
 void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
@@ -87,12 +80,7 @@ void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
 void SET::GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_SET, "called");
 
-    if (available_language_codes.size() > post4_0_0_max_entries) {
-        ctx.WriteBuffer(MakeLanguageCodeSubset<post4_0_0_max_entries>());
-    } else {
-        ctx.WriteBuffer(available_language_codes);
-    }
-    PushResponseLanguageCode(ctx, post4_0_0_max_entries);
+    GetAvailableLanguageCodesImpl(ctx, post4_0_0_max_entries);
 }
 
 void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
@@ -102,9 +90,9 @@ void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
 }
 
 void SET::GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx) {
-    PushResponseLanguageCode(ctx, post4_0_0_max_entries);
-
     LOG_DEBUG(Service_SET, "called");
+
+    PushResponseLanguageCode(ctx, post4_0_0_max_entries);
 }
 
 void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
index 01d80311b..a8d088305 100644
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -17,7 +17,7 @@ namespace Service::VI {
 
 Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
     auto& kernel = Core::System::GetInstance().Kernel();
-    vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
+    vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
                                                          fmt::format("Display VSync Event {}", id));
 }
 
diff --git a/src/core/memory.h b/src/core/memory.h
index b9fa18b1d..04e2c5f1d 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -72,15 +72,6 @@ u8* GetPointer(VAddr vaddr);
 
 std::string ReadCString(VAddr vaddr, std::size_t max_length);
 
-enum class FlushMode {
-    /// Write back modified surfaces to RAM
-    Flush,
-    /// Remove region from the cache
-    Invalidate,
-    /// Write back modified surfaces to RAM, and also remove them from the cache
-    FlushAndInvalidate,
-};
-
 /**
  * Mark each page touching the region as cached.
  */
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index e1db06811..4b17bada5 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -102,12 +102,6 @@ bool VerifyLogin(const std::string& username, const std::string& token) {
 }
 
 TelemetrySession::TelemetrySession() {
-#ifdef ENABLE_WEB_SERVICE
-    backend = std::make_unique<WebService::TelemetryJson>(
-        Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
-#else
-    backend = std::make_unique<Telemetry::NullVisitor>();
-#endif
     // Log one-time top-level information
     AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId());
 
@@ -175,9 +169,14 @@ TelemetrySession::~TelemetrySession() {
                                 .count()};
     AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time);
 
+#ifdef ENABLE_WEB_SERVICE
+    auto backend = std::make_unique<WebService::TelemetryJson>(
+        Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
+#else
+    auto backend = std::make_unique<Telemetry::NullVisitor>();
+#endif
+
     // Complete the session, submitting to web service if necessary
-    // This is just a placeholder to wrap up the session once the core completes and this is
-    // destroyed. This will be moved elsewhere once we are actually doing real I/O with the service.
     field_collection.Accept(*backend);
     if (Settings::values.enable_telemetry)
         backend->Complete();
@@ -186,6 +185,8 @@ TelemetrySession::~TelemetrySession() {
 
 bool TelemetrySession::SubmitTestcase() {
 #ifdef ENABLE_WEB_SERVICE
+    auto backend = std::make_unique<WebService::TelemetryJson>(
+        Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
     field_collection.Accept(*backend);
     return backend->SubmitTestcase();
 #else
diff --git a/src/core/telemetry_session.h b/src/core/telemetry_session.h
index 023612b79..cae5a45a0 100644
--- a/src/core/telemetry_session.h
+++ b/src/core/telemetry_session.h
@@ -39,7 +39,6 @@ public:
 
 private:
     Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session
-    std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields
 };
 
 /**
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6821f275d..1e010e4da 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,8 @@ add_library(video_core STATIC
     dma_pusher.h
     debug_utils/debug_utils.cpp
     debug_utils/debug_utils.h
+    engines/engine_upload.cpp
+    engines/engine_upload.h
     engines/fermi_2d.cpp
     engines/fermi_2d.h
     engines/kepler_compute.cpp
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 036e66f05..3175579cc 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -40,6 +40,13 @@ bool DmaPusher::Step() {
     }
 
     const CommandList& command_list{dma_pushbuffer.front()};
+    ASSERT_OR_EXECUTE(!command_list.empty(), {
+        // Somehow the command_list is empty, in order to avoid a crash
+        // We ignore it and assume its size is 0.
+        dma_pushbuffer.pop();
+        dma_pushbuffer_subindex = 0;
+        return true;
+    });
     const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
     GPUVAddr dma_get = command_list_header.addr;
     GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
new file mode 100644
index 000000000..082a40cd9
--- /dev/null
+++ b/src/video_core/engines/engine_upload.cpp
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/assert.h"
+#include "video_core/engines/engine_upload.h"
+#include "video_core/memory_manager.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra::Engines::Upload {
+
+State::State(MemoryManager& memory_manager, Registers& regs)
+    : regs{regs}, memory_manager{memory_manager} {}
+
+State::~State() = default;
+
+void State::ProcessExec(const bool is_linear) {
+    write_offset = 0;
+    copy_size = regs.line_length_in * regs.line_count;
+    inner_buffer.resize(copy_size);
+    this->is_linear = is_linear;
+}
+
+void State::ProcessData(const u32 data, const bool is_last_call) {
+    const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
+    std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
+    write_offset += sub_copy_size;
+    if (!is_last_call) {
+        return;
+    }
+    const GPUVAddr address{regs.dest.Address()};
+    if (is_linear) {
+        memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
+    } else {
+        UNIMPLEMENTED_IF(regs.dest.z != 0);
+        UNIMPLEMENTED_IF(regs.dest.depth != 1);
+        UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
+        UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
+        const std::size_t dst_size = Tegra::Texture::CalculateSize(
+            true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
+        tmp_buffer.resize(dst_size);
+        memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
+        Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
+                                      regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
+                                      tmp_buffer.data());
+        memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
+    }
+}
+
+} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
new file mode 100644
index 000000000..ef4f5839a
--- /dev/null
+++ b/src/video_core/engines/engine_upload.h
@@ -0,0 +1,73 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines::Upload {
+
+struct Registers {
+    u32 line_length_in;
+    u32 line_count;
+
+    struct {
+        u32 address_high;
+        u32 address_low;
+        u32 pitch;
+        union {
+            BitField<0, 4, u32> block_width;
+            BitField<4, 4, u32> block_height;
+            BitField<8, 4, u32> block_depth;
+        };
+        u32 width;
+        u32 height;
+        u32 depth;
+        u32 z;
+        u32 x;
+        u32 y;
+
+        GPUVAddr Address() const {
+            return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
+        }
+
+        u32 BlockWidth() const {
+            return 1U << block_width.Value();
+        }
+
+        u32 BlockHeight() const {
+            return 1U << block_height.Value();
+        }
+
+        u32 BlockDepth() const {
+            return 1U << block_depth.Value();
+        }
+    } dest;
+};
+
+class State {
+public:
+    State(MemoryManager& memory_manager, Registers& regs);
+    ~State();
+
+    void ProcessExec(bool is_linear);
+    void ProcessData(u32 data, bool is_last_call);
+
+private:
+    u32 write_offset = 0;
+    u32 copy_size = 0;
+    std::vector<u8> inner_buffer;
+    std::vector<u8> tmp_buffer;
+    bool is_linear = false;
+    Registers& regs;
+    MemoryManager& memory_manager;
+};
+
+} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 2e51b7f13..45f59a4d9 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -21,6 +21,12 @@ class RasterizerInterface;
 
 namespace Tegra::Engines {
 
+/**
+ * This Engine is known as G80_2D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+ */
+
 #define FERMI2D_REG_INDEX(field_name)                                                              \
     (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
 
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index b1d950460..7404a8163 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -4,12 +4,21 @@
 
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
+#include "video_core/textures/decoders.h"
 
 namespace Tegra::Engines {
 
-KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
+KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                             MemoryManager& memory_manager)
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
+                                                                                  memory_manager,
+                                                                                  regs.upload} {}
 
 KeplerCompute::~KeplerCompute() = default;
 
@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
     regs.reg_array[method_call.method] = method_call.argument;
 
     switch (method_call.method) {
+    case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
+        upload_state.ProcessExec(regs.exec_upload.linear != 0);
+        break;
+    }
+    case KEPLER_COMPUTE_REG_INDEX(data_upload): {
+        const bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+        }
+        break;
+    }
     case KEPLER_COMPUTE_REG_INDEX(launch):
-        // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
-        // kernels)
-        UNREACHABLE_MSG("Compute shaders are not implemented");
+        ProcessLaunch();
         break;
     default:
         break;
     }
 }
 
+void KeplerCompute::ProcessLaunch() {
+
+    const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
+    memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
+                                   LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
+
+    const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
+    LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
+}
+
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index fb6cdf432..5250b8d9b 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -6,22 +6,40 @@
 
 #include <array>
 #include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
 
+namespace Core {
+class System;
+}
+
 namespace Tegra {
 class MemoryManager;
 }
 
+namespace VideoCore {
+class RasterizerInterface;
+}
+
 namespace Tegra::Engines {
 
+/**
+ * This Engine is known as GK104_Compute. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
+ */
+
 #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
 
 class KeplerCompute final {
 public:
-    explicit KeplerCompute(MemoryManager& memory_manager);
+    explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                           MemoryManager& memory_manager);
     ~KeplerCompute();
 
     static constexpr std::size_t NumConstBuffers = 8;
@@ -31,30 +49,181 @@ public:
 
         union {
             struct {
-                INSERT_PADDING_WORDS(0xAF);
+                INSERT_PADDING_WORDS(0x60);
+
+                Upload::Registers upload;
+
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec_upload;
+
+                u32 data_upload;
+
+                INSERT_PADDING_WORDS(0x3F);
+
+                struct {
+                    u32 address;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
+                    }
+                } launch_desc_loc;
+
+                INSERT_PADDING_WORDS(0x1);
 
                 u32 launch;
 
-                INSERT_PADDING_WORDS(0xC48);
+                INSERT_PADDING_WORDS(0x4A7);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 limit;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } tsc;
+
+                INSERT_PADDING_WORDS(0x3);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 limit;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } tic;
+
+                INSERT_PADDING_WORDS(0x22);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } code_loc;
+
+                INSERT_PADDING_WORDS(0x3FE);
+
+                u32 texture_const_buffer_index;
+
+                INSERT_PADDING_WORDS(0x374);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
     } regs{};
+
+    struct LaunchParams {
+        static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
+
+        INSERT_PADDING_WORDS(0x8);
+
+        u32 program_start;
+
+        INSERT_PADDING_WORDS(0x2);
+
+        BitField<30, 1, u32> linked_tsc;
+
+        BitField<0, 31, u32> grid_dim_x;
+        union {
+            BitField<0, 16, u32> grid_dim_y;
+            BitField<16, 16, u32> grid_dim_z;
+        };
+
+        INSERT_PADDING_WORDS(0x3);
+
+        BitField<0, 16, u32> shared_alloc;
+
+        BitField<0, 31, u32> block_dim_x;
+        union {
+            BitField<0, 16, u32> block_dim_y;
+            BitField<16, 16, u32> block_dim_z;
+        };
+
+        union {
+            BitField<0, 8, u32> const_buffer_enable_mask;
+            BitField<29, 2, u32> cache_layout;
+        } memory_config;
+
+        INSERT_PADDING_WORDS(0x8);
+
+        struct {
+            u32 address_low;
+            union {
+                BitField<0, 8, u32> address_high;
+                BitField<15, 17, u32> size;
+            };
+            GPUVAddr Address() const {
+                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
+                                             address_low);
+            }
+        } const_buffer_config[8];
+
+        union {
+            BitField<0, 20, u32> local_pos_alloc;
+            BitField<27, 5, u32> barrier_alloc;
+        };
+
+        union {
+            BitField<0, 20, u32> local_neg_alloc;
+            BitField<24, 5, u32> gpr_alloc;
+        };
+
+        INSERT_PADDING_WORDS(0x11);
+    } launch_description;
+
+    struct {
+        u32 write_offset = 0;
+        u32 copy_size = 0;
+        std::vector<u8> inner_buffer;
+    } state{};
+
     static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
                   "KeplerCompute Regs has wrong size");
 
+    static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
+                  "KeplerCompute LaunchParams has wrong size");
+
     /// Write the value to the register identified by method.
     void CallMethod(const GPU::MethodCall& method_call);
 
 private:
+    Core::System& system;
+    VideoCore::RasterizerInterface& rasterizer;
     MemoryManager& memory_manager;
+    Upload::State upload_state;
+
+    void ProcessLaunch();
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
     static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4,                       \
                   "Field " #field_name " has invalid position")
 
+#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position)                                         \
+    static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4,               \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
 ASSERT_REG_POSITION(launch, 0xAF);
+ASSERT_REG_POSITION(tsc, 0x557);
+ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(code_loc, 0x582);
+ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
+ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
+ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
+ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
+ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
+ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
+ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
 
 #undef ASSERT_REG_POSITION
 
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 7387886a3..0561f676c 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,9 +14,8 @@
 
 namespace Tegra::Engines {
 
-KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                           MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
+KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
+    : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
 
 KeplerMemory::~KeplerMemory() = default;
 
@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
 
     switch (method_call.method) {
     case KEPLERMEMORY_REG_INDEX(exec): {
-        ProcessExec();
+        upload_state.ProcessExec(regs.exec.linear != 0);
         break;
     }
     case KEPLERMEMORY_REG_INDEX(data): {
-        ProcessData(method_call.argument, method_call.IsLastCall());
+        const bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+        }
         break;
     }
     }
 }
 
-void KeplerMemory::ProcessExec() {
-    state.write_offset = 0;
-    state.copy_size = regs.line_length_in * regs.line_count;
-    state.inner_buffer.resize(state.copy_size);
-}
-
-void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
-    const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
-    std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
-    state.write_offset += sub_copy_size;
-    if (is_last_call) {
-        const GPUVAddr address{regs.dest.Address()};
-        if (regs.exec.linear != 0) {
-            memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
-        } else {
-            UNIMPLEMENTED_IF(regs.dest.z != 0);
-            UNIMPLEMENTED_IF(regs.dest.depth != 1);
-            UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
-            UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
-            const std::size_t dst_size = Tegra::Texture::CalculateSize(
-                true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
-            std::vector<u8> tmp_buffer(dst_size);
-            memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
-            Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
-                                          regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
-                                          state.inner_buffer.data(), tmp_buffer.data());
-            memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
-        }
-        system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
-    }
-}
-
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 5f892ddad..f3bc675a9 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,6 +10,7 @@
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
 
 namespace Core {
@@ -20,19 +21,20 @@ namespace Tegra {
 class MemoryManager;
 }
 
-namespace VideoCore {
-class RasterizerInterface;
-}
-
 namespace Tegra::Engines {
 
+/**
+ * This Engine is known as P2MF. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
+ */
+
 #define KEPLERMEMORY_REG_INDEX(field_name)                                                         \
     (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
 
 class KeplerMemory final {
 public:
-    KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                 MemoryManager& memory_manager);
+    KeplerMemory(Core::System& system, MemoryManager& memory_manager);
     ~KeplerMemory();
 
     /// Write the value to the register identified by method.
@@ -45,42 +47,7 @@ public:
             struct {
                 INSERT_PADDING_WORDS(0x60);
 
-                u32 line_length_in;
-                u32 line_count;
-
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-                    u32 pitch;
-                    union {
-                        BitField<0, 4, u32> block_width;
-                        BitField<4, 4, u32> block_height;
-                        BitField<8, 4, u32> block_depth;
-                    };
-                    u32 width;
-                    u32 height;
-                    u32 depth;
-                    u32 z;
-                    u32 x;
-                    u32 y;
-
-                    GPUVAddr Address() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-
-                    u32 BlockWidth() const {
-                        return 1U << block_width.Value();
-                    }
-
-                    u32 BlockHeight() const {
-                        return 1U << block_height.Value();
-                    }
-
-                    u32 BlockDepth() const {
-                        return 1U << block_depth.Value();
-                    }
-                } dest;
+                Upload::Registers upload;
 
                 struct {
                     union {
@@ -96,28 +63,17 @@ public:
         };
     } regs{};
 
-    struct {
-        u32 write_offset = 0;
-        u32 copy_size = 0;
-        std::vector<u8> inner_buffer;
-    } state{};
-
 private:
     Core::System& system;
-    VideoCore::RasterizerInterface& rasterizer;
     MemoryManager& memory_manager;
-
-    void ProcessExec();
-    void ProcessData(u32 data, bool is_last_call);
+    Upload::State upload_state;
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
     static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4,                        \
                   "Field " #field_name " has invalid position")
 
-ASSERT_REG_POSITION(line_length_in, 0x60);
-ASSERT_REG_POSITION(line_count, 0x61);
-ASSERT_REG_POSITION(dest, 0x62);
+ASSERT_REG_POSITION(upload, 0x60);
 ASSERT_REG_POSITION(exec, 0x6C);
 ASSERT_REG_POSITION(data, 0x6D);
 #undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 9780417f2..39968d403 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
 
 Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                      MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
-                                                                                  *this} {
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
+      macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
     InitializeRegisterDefaults();
 }
 
@@ -34,9 +34,9 @@ void Maxwell3D::InitializeRegisterDefaults() {
 
     // Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
     // needed for ARMS.
-    for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) {
-        regs.viewports[viewport].depth_range_near = 0.0f;
-        regs.viewports[viewport].depth_range_far = 1.0f;
+    for (auto& viewport : regs.viewports) {
+        viewport.depth_range_near = 0.0f;
+        viewport.depth_range_far = 1.0f;
     }
 
     // Doom and Bomberman seems to use the uninitialized registers and just enable blend
@@ -47,13 +47,13 @@ void Maxwell3D::InitializeRegisterDefaults() {
     regs.blend.equation_a = Regs::Blend::Equation::Add;
     regs.blend.factor_source_a = Regs::Blend::Factor::One;
     regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
-    for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
-        regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
-        regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
-        regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
-        regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
-        regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
-        regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
+    for (auto& blend : regs.independent_blend) {
+        blend.equation_rgb = Regs::Blend::Equation::Add;
+        blend.factor_source_rgb = Regs::Blend::Factor::One;
+        blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
+        blend.equation_a = Regs::Blend::Equation::Add;
+        blend.factor_source_a = Regs::Blend::Factor::One;
+        blend.factor_dest_a = Regs::Blend::Factor::Zero;
     }
     regs.stencil_front_op_fail = Regs::StencilOp::Keep;
     regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
@@ -75,11 +75,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
 
     // TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a
     // default of enabled fixes rendering here.
-    for (std::size_t color_mask = 0; color_mask < Regs::NumRenderTargets; color_mask++) {
-        regs.color_mask[color_mask].R.Assign(1);
-        regs.color_mask[color_mask].G.Assign(1);
-        regs.color_mask[color_mask].B.Assign(1);
-        regs.color_mask[color_mask].A.Assign(1);
+    for (auto& color_mask : regs.color_mask) {
+        color_mask.R.Assign(1);
+        color_mask.G.Assign(1);
+        color_mask.B.Assign(1);
+        color_mask.A.Assign(1);
     }
 
     // Commercial games seem to assume this value is enabled and nouveau sets this value manually.
@@ -178,13 +178,13 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
 
         // Vertex buffer
         if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
-            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
+            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
             dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
         } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
-                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
+                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
             dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
         } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
-                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
+                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
             dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
         }
     }
@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
         ProcessSyncPoint();
         break;
     }
+    case MAXWELL3D_REG_INDEX(exec_upload): {
+        upload_state.ProcessExec(regs.exec_upload.linear != 0);
+        break;
+    }
+    case MAXWELL3D_REG_INDEX(data_upload): {
+        const bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            dirty_flags.OnMemoryWrite();
+        }
+        break;
+    }
     default:
         break;
     }
@@ -430,7 +442,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
     const auto a_type = tic_entry.a_type.Value();
 
     // TODO(Subv): Different data types for separate components are not supported
-    ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
+    DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
 
     return tic_entry;
 }
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index b1e640dd1..f342c78e6 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <bitset>
+#include <type_traits>
 #include <unordered_map>
 #include <vector>
 
@@ -14,6 +15,7 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/math_util.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
 #include "video_core/macro_interpreter.h"
 #include "video_core/textures/texture.h"
@@ -32,6 +34,12 @@ class RasterizerInterface;
 
 namespace Tegra::Engines {
 
+/**
+ * This Engine is known as GF100_3D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+ */
+
 #define MAXWELL3D_REG_INDEX(field_name)                                                            \
     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
 
@@ -581,7 +589,18 @@ public:
                     u32 bind;
                 } macros;
 
-                INSERT_PADDING_WORDS(0x69);
+                INSERT_PADDING_WORDS(0x17);
+
+                Upload::Registers upload;
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec_upload;
+
+                u32 data_upload;
+
+                INSERT_PADDING_WORDS(0x44);
 
                 struct {
                     union {
@@ -1090,6 +1109,7 @@ public:
     } regs{};
 
     static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
+    static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
 
     struct State {
         struct ConstBufferInfo {
@@ -1177,6 +1197,8 @@ private:
     /// Interpreter for the macro codes uploaded to the GPU.
     MacroInterpreter macro_interpreter;
 
+    Upload::State upload_state;
+
     /// Retrieves information about a specific TIC entry from the TIC buffer.
     Texture::TICEntry GetTICEntry(u32 tic_index) const;
 
@@ -1220,6 +1242,9 @@ private:
                   "Field " #field_name " has invalid position")
 
 ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
 ASSERT_REG_POSITION(sync_info, 0xB2);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 2426d0067..3a5dfef0c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
 
     ASSERT(regs.exec.enable_2d == 1);
 
-    const std::size_t copy_size = regs.x_count * regs.y_count;
+    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
+        ASSERT(regs.src_params.size_z == 1);
+        // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+        const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+        const std::size_t src_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
+            regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
 
-    auto source_ptr{memory_manager.GetPointer(source)};
-    auto dst_ptr{memory_manager.GetPointer(dest)};
+        const std::size_t dst_size = regs.dst_pitch * regs.y_count;
 
-    if (!source_ptr) {
-        LOG_ERROR(HW_GPU, "source_ptr is invalid");
-        return;
-    }
+        if (read_buffer.size() < src_size) {
+            read_buffer.resize(src_size);
+        }
 
-    if (!dst_ptr) {
-        LOG_ERROR(HW_GPU, "dst_ptr is invalid");
-        return;
-    }
+        if (write_buffer.size() < dst_size) {
+            write_buffer.resize(dst_size);
+        }
 
-    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
-        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
-        // copying.
-        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
+        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
 
-        // We have to invalidate the destination region to evict any outdated surfaces from the
-        // cache. We do this before actually writing the new data because the destination address
-        // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
-    };
+        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
+                                  regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
+                                  write_buffer.data(), regs.src_params.BlockHeight(),
+                                  regs.src_params.pos_x, regs.src_params.pos_y);
 
-    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
-        ASSERT(regs.src_params.size_z == 1);
-        // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
+    } else {
+        ASSERT(regs.dst_params.BlockDepth() == 1);
 
-        const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+        const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
 
-        FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
-                           copy_size * src_bytes_per_pixel);
+        const std::size_t dst_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
+            regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
 
-        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
-                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
-                                  regs.src_params.pos_y);
-    } else {
-        ASSERT(regs.dst_params.size_z == 1);
-        ASSERT(regs.src_pitch == regs.x_count);
+        const std::size_t dst_layer_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
+            regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
 
-        const u32 src_bpp = regs.src_pitch / regs.x_count;
+        const std::size_t src_size = regs.src_pitch * regs.y_count;
 
-        FlushAndInvalidate(regs.src_pitch * regs.y_count,
-                           regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
+        if (read_buffer.size() < src_size) {
+            read_buffer.resize(src_size);
+        }
+
+        if (write_buffer.size() < dst_size) {
+            write_buffer.resize(dst_size);
+        }
+
+        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
 
         // If the input is linear and the output is tiled, swizzle the input and copy it over.
         Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
+                                src_bytes_per_pixel,
+                                write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
+                                read_buffer.data(), regs.dst_params.BlockHeight());
+
+        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
     }
 }
 
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c6b649842..e5942f671 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <cstddef>
+#include <vector>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@@ -25,6 +26,11 @@ class RasterizerInterface;
 
 namespace Tegra::Engines {
 
+/**
+ * This Engine is known as GK104_Copy. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
+ */
+
 class MaxwellDMA final {
 public:
     explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
@@ -63,6 +69,16 @@ public:
 
         static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
 
+        enum class ComponentMode : u32 {
+            Src0 = 0,
+            Src1 = 1,
+            Src2 = 2,
+            Src3 = 3,
+            Const0 = 4,
+            Const1 = 5,
+            Zero = 6,
+        };
+
         enum class CopyMode : u32 {
             None = 0,
             Unk1 = 1,
@@ -128,7 +144,26 @@ public:
                 u32 x_count;
                 u32 y_count;
 
-                INSERT_PADDING_WORDS(0xBB);
+                INSERT_PADDING_WORDS(0xB8);
+
+                u32 const0;
+                u32 const1;
+                union {
+                    BitField<0, 4, ComponentMode> component0;
+                    BitField<4, 4, ComponentMode> component1;
+                    BitField<8, 4, ComponentMode> component2;
+                    BitField<12, 4, ComponentMode> component3;
+                    BitField<16, 2, u32> component_size;
+                    BitField<20, 3, u32> src_num_components;
+                    BitField<24, 3, u32> dst_num_components;
+
+                    u32 SrcBytePerPixel() const {
+                        return src_num_components.Value() * component_size.Value();
+                    }
+                    u32 DstBytePerPixel() const {
+                        return dst_num_components.Value() * component_size.Value();
+                    }
+                } swizzle_config;
 
                 Parameters dst_params;
 
@@ -149,6 +184,9 @@ private:
 
     MemoryManager& memory_manager;
 
+    std::vector<u8> read_buffer;
+    std::vector<u8> write_buffer;
+
     /// Performs the copy from the source buffer to the destination buffer as configured in the
     /// registers.
     void HandleCopy();
@@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
 ASSERT_REG_POSITION(dst_pitch, 0x105);
 ASSERT_REG_POSITION(x_count, 0x106);
 ASSERT_REG_POSITION(y_count, 0x107);
+ASSERT_REG_POSITION(const0, 0x1C0);
+ASSERT_REG_POSITION(const1, 0x1C1);
+ASSERT_REG_POSITION(swizzle_config, 0x1C2);
 ASSERT_REG_POSITION(dst_params, 0x1C3);
 ASSERT_REG_POSITION(src_params, 0x1CA);
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4461083ff..52706505b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
     dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
     maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
     fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
-    kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
+    kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
     maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
-    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
 }
 
 GPU::~GPU() = default;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c9a2077de..1e2ff46b0 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -44,7 +44,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
                 renderer.Rasterizer().FlushRegion(data->addr, data->size);
             } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
                 renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
-            } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
+            } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
                 return;
             } else {
                 UNREACHABLE();
@@ -118,7 +118,7 @@ void SynchState::WaitForSynchronization(u64 fence) {
     // Wait for the GPU to be idle (all commands to be executed)
     {
         MICROPROFILE_SCOPE(GPU_wait);
-        std::unique_lock<std::mutex> lock{synchronization_mutex};
+        std::unique_lock lock{synchronization_mutex};
         synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
     }
 }
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index cc14527c7..05a168a72 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -81,12 +81,6 @@ struct CommandDataContainer {
     CommandDataContainer(CommandData&& data, u64 next_fence)
         : data{std::move(data)}, fence{next_fence} {}
 
-    CommandDataContainer& operator=(const CommandDataContainer& t) {
-        data = std::move(t.data);
-        fence = t.fence;
-        return *this;
-    }
-
     CommandData data;
     u64 fence{};
 };
@@ -109,7 +103,7 @@ struct SynchState final {
 
     void TrySynchronize() {
         if (IsSynchronized()) {
-            std::lock_guard<std::mutex> lock{synchronization_mutex};
+            std::lock_guard lock{synchronization_mutex};
             synchronization_condition.notify_one();
         }
     }
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 524d9ea5a..fbea107ca 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -118,10 +118,10 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
                           static_cast<u32>(opcode.operation.Value()));
     }
 
-    if (opcode.is_exit) {
+    // An instruction with the Exit flag will not actually
+    // cause an exit if it's executed inside a delay slot.
+    if (opcode.is_exit && !is_delay_slot) {
         // Exit has a delay slot, execute the next instruction
-        // Note: Executing an exit during a branch delay slot will cause the instruction at the
-        // branch target to be executed before exiting.
         Step(offset, true);
         return false;
     }
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 6c98c6701..5d8d126c1 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -25,6 +25,8 @@ MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : raste
     UpdatePageTableForVMA(initial_vma);
 }
 
+MemoryManager::~MemoryManager() = default;
+
 GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
     const u64 aligned_size{Common::AlignUp(size, page_size)};
     const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
@@ -199,11 +201,11 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
     return {};
 }
 
-bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) {
+bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
     const GPUVAddr end = start + size;
     const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
     const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
-    const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
+    const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
     return range == size;
 }
 
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index e4f0c4bd6..113f9d8f3 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -47,7 +47,8 @@ struct VirtualMemoryArea {
 
 class MemoryManager final {
 public:
-    MemoryManager(VideoCore::RasterizerInterface& rasterizer);
+    explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer);
+    ~MemoryManager();
 
     GPUVAddr AllocateSpace(u64 size, u64 align);
     GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
@@ -65,18 +66,18 @@ public:
     u8* GetPointer(GPUVAddr addr);
     const u8* GetPointer(GPUVAddr addr) const;
 
-    // Returns true if the block is continous in host memory, false otherwise
-    bool IsBlockContinous(const GPUVAddr start, const std::size_t size);
+    /// Returns true if the block is continuous in host memory, false otherwise
+    bool IsBlockContinuous(GPUVAddr start, std::size_t size) const;
 
     /**
      * ReadBlock and WriteBlock are full read and write operations over virtual
-     * GPU Memory. It's important to use these when GPU memory may not be continous
+     * GPU Memory. It's important to use these when GPU memory may not be continuous
      * in the Host Memory counterpart. Note: This functions cause Host GPU Memory
      * Flushes and Invalidations, respectively to each operation.
      */
-    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
-    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
-    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
+    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
 
     /**
      * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -88,9 +89,9 @@ public:
      * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
      * being flushed.
      */
-    void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
-    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
-    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
+    void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
 
 private:
     using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
@@ -111,10 +112,10 @@ private:
     /**
      * Maps an unmanaged host memory pointer at a given address.
      *
-     * @param target The guest address to start the mapping at.
-     * @param memory The memory to be mapped.
-     * @param size Size of the mapping.
-     * @param state MemoryState tag to attach to the VMA.
+     * @param target       The guest address to start the mapping at.
+     * @param memory       The memory to be mapped.
+     * @param size         Size of the mapping in bytes.
+     * @param backing_addr The base address of the range to back this mapping.
      */
     VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
 
@@ -124,7 +125,7 @@ private:
     /// Converts a VMAHandle to a mutable VMAIter.
     VMAIter StripIterConstness(const VMAHandle& iter);
 
-    /// Marks as the specfied VMA as allocated.
+    /// Marks as the specified VMA as allocated.
     VMAIter Allocate(VMAIter vma);
 
     /**
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 291772186..0c4ea1494 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -37,9 +37,6 @@ public:
     /// Gets the size of the shader in guest memory, required for cache management
     virtual std::size_t GetSizeInBytes() const = 0;
 
-    /// Wriets any cached resources back to memory
-    virtual void Flush() = 0;
-
     /// Sets whether the cached object should be considered registered
     void SetIsRegistered(bool registered) {
         is_registered = registered;
@@ -147,8 +144,9 @@ protected:
 
         object->SetIsRegistered(false);
         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
+        const CacheAddr addr = object->GetCacheAddr();
         interval_cache.subtract({GetInterval(object), ObjectSet{object}});
-        map_cache.erase(object->GetCacheAddr());
+        map_cache.erase(addr);
     }
 
     /// Returns a ticks counter used for tracking when cached objects were last modified
@@ -158,6 +156,8 @@ protected:
         return ++modified_ticks;
     }
 
+    virtual void FlushObjectInner(const T& object) = 0;
+
     /// Flushes the specified object, updating appropriate cache state as needed
     void FlushObject(const T& object) {
         std::lock_guard lock{mutex};
@@ -165,7 +165,7 @@ protected:
         if (!object->IsDirty()) {
             return;
         }
-        object->Flush();
+        FlushObjectInner(object);
         object->MarkAsModified(false, *this);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index fc33aa433..f9247a40e 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -42,9 +42,6 @@ public:
         return alignment;
     }
 
-    // We do not have to flush this cache as things in it are never modified by us.
-    void Flush() override {}
-
 private:
     VAddr cpu_addr{};
     std::size_t size{};
@@ -75,6 +72,9 @@ public:
 protected:
     void AlignBuffer(std::size_t alignment);
 
+    // We do not have to flush this cache as things in it are never modified by us.
+    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+
 private:
     OGLStreamBuffer stream_buffer;
 
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 196e6e278..2d467a240 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -46,7 +46,7 @@ public:
     /// Reloads the global region from guest memory
     void Reload(u32 size_);
 
-    void Flush() override;
+    void Flush();
 
 private:
     VAddr cpu_addr{};
@@ -65,6 +65,11 @@ public:
     GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
                                  Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
 
+protected:
+    void FlushObjectInner(const GlobalRegion& object) override {
+        object->Flush();
+    }
+
 private:
     GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
     GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index db73e746c..dbd8049f5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -261,8 +261,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
             // MakeQuadArray always generates u32 indexes
             params.index_format = GL_UNSIGNED_INT;
             params.count = (regs.vertex_buffer.count / 4) * 6;
-            params.index_buffer_offset =
-                primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
+            params.index_buffer_offset = primitive_assembler.MakeQuadArray(
+                regs.vertex_buffer.first, regs.vertex_buffer.count);
         }
         return params;
     }
@@ -922,8 +922,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
         viewport.y = viewport_rect.bottom;
         viewport.width = viewport_rect.GetWidth();
         viewport.height = viewport_rect.GetHeight();
-        viewport.depth_range_far = regs.viewports[i].depth_range_far;
-        viewport.depth_range_near = regs.viewports[i].depth_range_near;
+        viewport.depth_range_far = src.depth_range_far;
+        viewport.depth_range_near = src.depth_range_near;
     }
     state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
     state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
@@ -1135,7 +1135,9 @@ void RasterizerOpenGL::SyncTransformFeedback() {
 
 void RasterizerOpenGL::SyncPointState() {
     const auto& regs = system.GPU().Maxwell3D().regs;
-    state.point.size = regs.point_size;
+    // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
+    // in OpenGL).
+    state.point.size = std::max(1.0f, regs.point_size);
 }
 
 void RasterizerOpenGL::SyncPolygonOffset() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 5a25f5b37..a7681902e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -628,9 +628,11 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
 }
 
 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
-void CachedSurface::LoadGLBuffer() {
+void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
     MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
-    gl_buffer.resize(params.max_mip_level);
+    auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
+    if (gl_buffer.size() < params.max_mip_level)
+        gl_buffer.resize(params.max_mip_level);
     for (u32 i = 0; i < params.max_mip_level; i++)
         gl_buffer[i].resize(params.GetMipmapSizeGL(i));
     if (params.is_tiled) {
@@ -671,13 +673,13 @@ void CachedSurface::LoadGLBuffer() {
 }
 
 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
-void CachedSurface::FlushGLBuffer() {
+void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
     MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
 
     ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");
 
+    auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
     // OpenGL temporary buffer needs to be big enough to store raw texture size
-    gl_buffer.resize(1);
     gl_buffer[0].resize(GetSizeInBytes());
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
@@ -713,10 +715,12 @@ void CachedSurface::FlushGLBuffer() {
     }
 }
 
-void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
-                                          GLuint draw_fb_handle) {
+void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
+                                          GLuint read_fb_handle, GLuint draw_fb_handle) {
     const auto& rect{params.GetRect(mip_map)};
 
+    auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
+
     // Load data from memory to the surface
     const auto x0 = static_cast<GLint>(rect.left);
     const auto y0 = static_cast<GLint>(rect.bottom);
@@ -801,7 +805,6 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
                                 tuple.type, &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::TextureCubemap: {
-            std::size_t start = buffer_offset;
             for (std::size_t face = 0; face < params.depth; ++face) {
                 glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
                                     static_cast<GLsizei>(rect.GetWidth()),
@@ -845,11 +848,12 @@ void CachedSurface::EnsureTextureDiscrepantView() {
 }
 
 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
-void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
+void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem,
+                                    GLuint read_fb_handle, GLuint draw_fb_handle) {
     MICROPROFILE_SCOPE(OpenGL_TextureUL);
 
     for (u32 i = 0; i < params.max_mip_level; i++)
-        UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
+        UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle);
 }
 
 void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
@@ -929,8 +933,8 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
 }
 
 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
-    surface->LoadGLBuffer();
-    surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
+    surface->LoadGLBuffer(temporal_memory);
+    surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle);
     surface->MarkAsModified(false, *this);
     surface->MarkForReload(false);
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index db280dbb3..6263ef3e7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -355,6 +355,12 @@ namespace OpenGL {
 
 class RasterizerOpenGL;
 
+// This is used to store temporary big buffers,
+// instead of creating/destroying all the time
+struct RasterizerTemporaryMemory {
+    std::vector<std::vector<u8>> gl_buffer;
+};
+
 class CachedSurface final : public RasterizerCacheObject {
 public:
     explicit CachedSurface(const SurfaceParams& params);
@@ -371,10 +377,6 @@ public:
         return memory_size;
     }
 
-    void Flush() override {
-        FlushGLBuffer();
-    }
-
     const OGLTexture& Texture() const {
         return texture;
     }
@@ -397,11 +399,12 @@ public:
     }
 
     // Read/Write data in Switch memory to/from gl_buffer
-    void LoadGLBuffer();
-    void FlushGLBuffer();
+    void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
+    void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
 
     // Upload data in gl_buffer to this surface's texture
-    void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
+    void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle,
+                         GLuint draw_fb_handle);
 
     void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
                        Tegra::Texture::SwizzleSource swizzle_y,
@@ -429,13 +432,13 @@ public:
     }
 
 private:
-    void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
+    void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
+                               GLuint read_fb_handle, GLuint draw_fb_handle);
 
     void EnsureTextureDiscrepantView();
 
     OGLTexture texture;
     OGLTexture discrepant_view;
-    std::vector<std::vector<u8>> gl_buffer;
     SurfaceParams params{};
     GLenum gl_target{};
     GLenum gl_internal_format{};
@@ -473,6 +476,11 @@ public:
     void SignalPreDrawCall();
     void SignalPostDrawCall();
 
+protected:
+    void FlushObjectInner(const Surface& object) override {
+        object->FlushGLBuffer(temporal_memory);
+    }
+
 private:
     void LoadSurface(const Surface& surface);
     Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
@@ -519,6 +527,8 @@ private:
     std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
     Surface last_depth_buffer;
 
+    RasterizerTemporaryMemory temporal_memory;
+
     using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
     using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index b1c8f7c35..f700dc89a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -345,7 +345,7 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
 
 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
                                      const Device& device)
-    : RasterizerCache{rasterizer}, disk_cache{system}, device{device} {}
+    : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {}
 
 void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
                                       const VideoCore::DiskResourceLoadCallback& callback) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a332087f8..31b979987 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -57,9 +57,6 @@ public:
         return shader_length;
     }
 
-    // We do not have to flush this cache as things in it are never modified by us.
-    void Flush() override {}
-
     /// Gets the shader entries for the shader
     const GLShader::ShaderEntries& GetShaderEntries() const {
         return entries;
@@ -123,6 +120,10 @@ public:
     /// Gets the current specified shader stage program
     Shader GetStageProgram(Maxwell::ShaderProgram program);
 
+protected:
+    // We do not have to flush this cache as things in it are never modified by us.
+    void FlushObjectInner(const Shader& object) override {}
+
 private:
     std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
         const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index da925372c..4bff54a59 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -943,17 +943,6 @@ private:
         return {};
     }
 
-    std::string Composite(Operation operation) {
-        std::string value = "vec4(";
-        for (std::size_t i = 0; i < 4; ++i) {
-            value += Visit(operation[i]);
-            if (i < 3)
-                value += ", ";
-        }
-        value += ')';
-        return value;
-    }
-
     template <Type type>
     std::string Add(Operation operation) {
         return GenerateBinaryInfix(operation, "+", type, type, type);
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index ed7afc4a0..fba9c594a 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -104,8 +104,9 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
     return true;
 }
 
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
-    : system{system}, precompiled_cache_virtual_file_offset{0} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+
+ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
 
 std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
 ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -243,7 +244,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
                 return {};
             }
 
-            const auto entry = LoadDecompiledEntry();
+            auto entry = LoadDecompiledEntry();
             if (!entry) {
                 return {};
             }
@@ -287,13 +288,13 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
         return {};
     }
 
-    std::vector<u8> code(code_size);
+    std::string code(code_size, '\0');
     if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
         return {};
     }
 
     ShaderDiskCacheDecompiled entry;
-    entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
+    entry.code = std::move(code);
 
     u32 const_buffers_count{};
     if (!LoadObjectFromPrecompiled(const_buffers_count)) {
@@ -303,12 +304,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
     for (u32 i = 0; i < const_buffers_count; ++i) {
         u32 max_offset{};
         u32 index{};
-        u8 is_indirect{};
+        bool is_indirect{};
         if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
             !LoadObjectFromPrecompiled(is_indirect)) {
             return {};
         }
-        entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
+        entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
     }
 
     u32 samplers_count{};
@@ -320,18 +321,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
         u64 offset{};
         u64 index{};
         u32 type{};
-        u8 is_array{};
-        u8 is_shadow{};
-        u8 is_bindless{};
+        bool is_array{};
+        bool is_shadow{};
+        bool is_bindless{};
         if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
             !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
             !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
             return {};
         }
-        entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
-                                            static_cast<std::size_t>(index),
-                                            static_cast<Tegra::Shader::TextureType>(type),
-                                            is_array != 0, is_shadow != 0, is_bindless != 0);
+        entry.entries.samplers.emplace_back(
+            static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
+            static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
     }
 
     u32 global_memory_count{};
@@ -342,21 +342,20 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
     for (u32 i = 0; i < global_memory_count; ++i) {
         u32 cbuf_index{};
         u32 cbuf_offset{};
-        u8 is_read{};
-        u8 is_written{};
+        bool is_read{};
+        bool is_written{};
         if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
             !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
             return {};
         }
-        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
-                                                         is_written != 0);
+        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
+                                                         is_written);
     }
 
     for (auto& clip_distance : entry.entries.clip_distances) {
-        u8 clip_distance_raw{};
-        if (!LoadObjectFromPrecompiled(clip_distance_raw))
+        if (!LoadObjectFromPrecompiled(clip_distance)) {
             return {};
-        clip_distance = clip_distance_raw != 0;
+        }
     }
 
     u64 shader_length{};
@@ -384,7 +383,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
     for (const auto& cbuf : entries.const_buffers) {
         if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
             !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
+            !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
             return false;
         }
     }
@@ -396,9 +395,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
         if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
             !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
             !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
+            !SaveObjectToPrecompiled(sampler.IsArray()) ||
+            !SaveObjectToPrecompiled(sampler.IsShadow()) ||
+            !SaveObjectToPrecompiled(sampler.IsBindless())) {
             return false;
         }
     }
@@ -409,14 +408,13 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
     for (const auto& gmem : entries.global_memory_entries) {
         if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
             !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
+            !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
             return false;
         }
     }
 
     for (const bool clip_distance : entries.clip_distances) {
-        if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
+        if (!SaveObjectToPrecompiled(clip_distance)) {
             return false;
         }
     }
@@ -475,7 +473,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
     ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
 
     auto& usages{it->second};
-    ASSERT(usages.find(usage) == usages.end());
+    if (usages.find(usage) != usages.end()) {
+        // Skip this variant since the shader is already stored.
+        return;
+    }
     usages.insert(usage);
 
     FileUtil::IOFile file = AppendTransferableFile();
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 0142b2e3b..2da0a4a23 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -70,14 +70,14 @@ namespace std {
 
 template <>
 struct hash<OpenGL::BaseBindings> {
-    std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
+    std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
         return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
     }
 };
 
 template <>
 struct hash<OpenGL::ShaderDiskCacheUsage> {
-    std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
+    std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
         return static_cast<std::size_t>(usage.unique_identifier) ^
                std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
     }
@@ -162,6 +162,7 @@ struct ShaderDiskCacheDump {
 class ShaderDiskCacheOpenGL {
 public:
     explicit ShaderDiskCacheOpenGL(Core::System& system);
+    ~ShaderDiskCacheOpenGL();
 
     /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
     std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
@@ -259,20 +260,35 @@ private:
         return SaveArrayToPrecompiled(&object, 1);
     }
 
+    bool SaveObjectToPrecompiled(bool object) {
+        const auto value = static_cast<u8>(object);
+        return SaveArrayToPrecompiled(&value, 1);
+    }
+
     template <typename T>
     bool LoadObjectFromPrecompiled(T& object) {
         return LoadArrayFromPrecompiled(&object, 1);
     }
 
-    // Copre system
+    bool LoadObjectFromPrecompiled(bool& object) {
+        u8 value;
+        const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
+        if (!read_ok) {
+            return false;
+        }
+
+        object = value != 0;
+        return true;
+    }
+
+    // Core system
     Core::System& system;
     // Stored transferable shaders
     std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
-    // Stores whole precompiled cache which will be read from or saved to the precompiled chache
-    // file
+    // Stores whole precompiled cache which will be read from/saved to the precompiled cache file
     FileSys::VectorVfsFile precompiled_cache_virtual_file;
     // Stores the current offset of the precompiled cache file for IO purposes
-    std::size_t precompiled_cache_virtual_file_offset;
+    std::size_t precompiled_cache_virtual_file_offset = 0;
 
     // The cache has been loaded at boot
     bool tried_to_load{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 6abf948f8..7ab0b4553 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -33,14 +33,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
 };
 
 )";
-    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
     ProgramResult program =
         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
 
     out += program.first;
 
     if (setup.IsDualProgram()) {
-        ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
+        const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
         ProgramResult program_b =
             Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
 
@@ -76,7 +76,7 @@ void main() {
     }
 })";
 
-    return {out, program.second};
+    return {std::move(out), std::move(program.second)};
 }
 
 ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
@@ -97,7 +97,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
 };
 
 )";
-    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
     ProgramResult program =
         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
     out += program.first;
@@ -107,7 +107,7 @@ void main() {
     execute_geometry();
 };)";
 
-    return {out, program.second};
+    return {std::move(out), std::move(program.second)};
 }
 
 ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
@@ -160,7 +160,7 @@ bool AlphaFunc(in float value) {
 }
 
 )";
-    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
     ProgramResult program =
         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
 
@@ -172,7 +172,7 @@ void main() {
 }
 
 )";
-    return {out, program.second};
+    return {std::move(out), std::move(program.second)};
 }
 
 } // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 95b773135..ed7b5cff0 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -126,6 +126,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
         return GL_TRIANGLES;
     case Maxwell::PrimitiveTopology::TriangleStrip:
         return GL_TRIANGLE_STRIP;
+    case Maxwell::PrimitiveTopology::TriangleFan:
+        return GL_TRIANGLE_FAN;
     default:
         LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
         UNREACHABLE();
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 08b786aad..3edf460df 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -49,9 +49,6 @@ public:
         return alignment;
     }
 
-    // We do not have to flush this cache as things in it are never modified by us.
-    void Flush() override {}
-
 private:
     VAddr cpu_addr{};
     std::size_t size{};
@@ -87,6 +84,10 @@ public:
         return buffer_handle;
     }
 
+protected:
+    // We do not have to flush this cache as things in it are never modified by us.
+    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+
 private:
     void AlignBuffer(std::size_t alignment);
 
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 850085f35..b61a6d170 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -315,7 +315,6 @@ private:
         constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
                                                                          "overflow"};
         for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
-            const auto flag_code = static_cast<InternalFlag>(flag);
             const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
             internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
         }
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 819cc6131..5b033126d 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -540,8 +540,6 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
                             bool is_array, bool is_aoffi) {
     const std::size_t coord_count = GetCoordCount(texture_type);
-    const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
-    const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
 
     // If enabled arrays index is always stored in the gpr8 field
     const u64 array_register = instr.gpr8.Value();
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 691d095c8..153ad1fd0 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -21,6 +21,13 @@ using Tegra::Shader::PredCondition;
 using Tegra::Shader::PredOperation;
 using Tegra::Shader::Register;
 
+ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset)
+    : program_code{program_code}, main_offset{main_offset} {
+    Decode();
+}
+
+ShaderIR::~ShaderIR() = default;
+
 Node ShaderIR::StoreNode(NodeData&& node_data) {
     auto store = std::make_unique<NodeData>(node_data);
     const Node node = store.get();
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 7e54f7e74..0bf124252 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -328,40 +328,31 @@ struct MetaTexture {
     u32 element{};
 };
 
-inline constexpr MetaArithmetic PRECISE = {true};
-inline constexpr MetaArithmetic NO_PRECISE = {false};
+constexpr MetaArithmetic PRECISE = {true};
+constexpr MetaArithmetic NO_PRECISE = {false};
 
 using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
 
 /// Holds any kind of operation that can be done in the IR
 class OperationNode final {
 public:
-    template <typename... T>
-    explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {}
+    explicit OperationNode(OperationCode code) : code{code} {}
 
-    template <typename... T>
-    explicit constexpr OperationNode(OperationCode code, Meta&& meta)
-        : code{code}, meta{std::move(meta)} {}
+    explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {}
 
     template <typename... T>
-    explicit constexpr OperationNode(OperationCode code, const T*... operands)
+    explicit OperationNode(OperationCode code, const T*... operands)
         : OperationNode(code, {}, operands...) {}
 
     template <typename... T>
-    explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
-        : code{code}, meta{std::move(meta)} {
-
-        auto operands_list = {operands_...};
-        for (auto& operand : operands_list) {
-            operands.push_back(operand);
-        }
-    }
+    explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
+        : code{code}, meta{std::move(meta)}, operands{operands_...} {}
 
     explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
         : code{code}, meta{meta}, operands{std::move(operands)} {}
 
     explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
-        : code{code}, meta{}, operands{std::move(operands)} {}
+        : code{code}, operands{std::move(operands)} {}
 
     OperationCode GetCode() const {
         return code;
@@ -572,11 +563,8 @@ private:
 
 class ShaderIR final {
 public:
-    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset)
-        : program_code{program_code}, main_offset{main_offset} {
-
-        Decode();
-    }
+    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset);
+    ~ShaderIR();
 
     const std::map<u32, NodeBlock>& GetBasicBlocks() const {
         return basic_blocks;
@@ -823,11 +811,12 @@ private:
     void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
                               Node op_c, Node imm_lut, bool sets_cc);
 
-    Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
+    Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
 
-    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
+    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
 
-    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
+    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
+                                       s64 cursor) const;
 
     std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
                                                                      Node addr_register,
@@ -844,12 +833,10 @@ private:
         return StoreNode(OperationNode(code, std::move(meta), operands...));
     }
 
-    template <typename... T>
     Node Operation(OperationCode code, std::vector<Node>&& operands) {
         return StoreNode(OperationNode(code, std::move(operands)));
     }
 
-    template <typename... T>
     Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
         return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
     }
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 4505667ff..19ede1eb9 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -17,22 +17,24 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
     for (; cursor >= 0; --cursor) {
         const Node node = code.at(cursor);
         if (const auto operation = std::get_if<OperationNode>(node)) {
-            if (operation->GetCode() == operation_code)
+            if (operation->GetCode() == operation_code) {
                 return {node, cursor};
+            }
         }
         if (const auto conditional = std::get_if<ConditionalNode>(node)) {
             const auto& conditional_code = conditional->GetCode();
             const auto [found, internal_cursor] = FindOperation(
                 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
-            if (found)
+            if (found) {
                 return {found, cursor};
+            }
         }
     }
     return {};
 }
 } // namespace
 
-Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
+Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const {
     if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
         // Cbuf found, but it has to be immediate
         return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
@@ -65,7 +67,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
     return nullptr;
 }
 
-std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
+std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
     // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
     // that it uses as operand
     const auto [found, found_cursor] =
@@ -80,7 +82,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
 }
 
 std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
-                                             s64 cursor) {
+                                             s64 cursor) const {
     for (; cursor >= 0; --cursor) {
         const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
         if (!found_node) {
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index b508d64e9..a9b8f69af 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -25,8 +25,8 @@
 
 class InputBitStream {
 public:
-    explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
-        : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+    explicit InputBitStream(const unsigned char* ptr, int start_offset = 0)
+        : m_CurByte(ptr), m_NextBit(start_offset % 8) {}
 
     ~InputBitStream() = default;
 
@@ -55,12 +55,9 @@ public:
     }
 
 private:
-    const int m_NumBits;
     const unsigned char* m_CurByte;
     int m_NextBit = 0;
     int m_BitsRead = 0;
-
-    bool done = false;
 };
 
 class OutputBitStream {
@@ -114,7 +111,6 @@ private:
     const int m_NumBits;
     unsigned char* m_CurByte;
     int m_NextBit = 0;
-    int m_BitsRead = 0;
 
     bool done = false;
 };
@@ -1616,6 +1612,7 @@ namespace Tegra::Texture::ASTC {
 std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
                                 uint32_t depth, uint32_t block_width, uint32_t block_height) {
     uint32_t blockIdx = 0;
+    std::size_t depth_offset = 0;
     std::vector<uint8_t> outData(height * width * depth * 4);
     for (uint32_t k = 0; k < depth; k++) {
         for (uint32_t j = 0; j < height; j += block_height) {
@@ -1630,7 +1627,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
                 uint32_t decompWidth = std::min(block_width, width - i);
                 uint32_t decompHeight = std::min(block_height, height - j);
 
-                uint8_t* outRow = outData.data() + (j * width + i) * 4;
+                uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4;
                 for (uint32_t jj = 0; jj < decompHeight; jj++) {
                     memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
                 }
@@ -1638,6 +1635,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
                 blockIdx++;
             }
         }
+        depth_offset += height * width * 4;
     }
 
     return outData;
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 5138bd9a3..7e883991a 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -82,8 +82,6 @@ add_executable(yuzu
     util/limitable_input_dialog.h
     util/sequence_dialog/sequence_dialog.cpp
     util/sequence_dialog/sequence_dialog.h
-    util/spinbox.cpp
-    util/spinbox.h
     util/util.cpp
     util/util.h
     compatdb.cpp
diff --git a/src/yuzu/about_dialog.cpp b/src/yuzu/about_dialog.cpp
index 3efa65a38..d39b3f07a 100644
--- a/src/yuzu/about_dialog.cpp
+++ b/src/yuzu/about_dialog.cpp
@@ -9,10 +9,10 @@
 
 AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) {
     ui->setupUi(this);
-    ui->labelLogo->setPixmap(QIcon::fromTheme("yuzu").pixmap(200));
-    ui->labelBuildInfo->setText(
-        ui->labelBuildInfo->text().arg(Common::g_build_fullname, Common::g_scm_branch,
-                                       Common::g_scm_desc, QString(Common::g_build_date).left(10)));
+    ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200));
+    ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg(
+        QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch),
+        QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10)));
 }
 
 AboutDialog::~AboutDialog() = default;
diff --git a/src/yuzu/applets/error.cpp b/src/yuzu/applets/error.cpp
index 1fb2fe277..106dde9e2 100644
--- a/src/yuzu/applets/error.cpp
+++ b/src/yuzu/applets/error.cpp
@@ -54,6 +54,6 @@ void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_te
 
 void QtErrorDisplay::MainWindowFinishedError() {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     callback();
 }
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp
index 743b24d76..7fbc9deeb 100644
--- a/src/yuzu/applets/profile_select.cpp
+++ b/src/yuzu/applets/profile_select.cpp
@@ -84,10 +84,10 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
     tree_view->setContextMenuPolicy(Qt::NoContextMenu);
 
     item_model->insertColumns(0, 1);
-    item_model->setHeaderData(0, Qt::Horizontal, "Users");
+    item_model->setHeaderData(0, Qt::Horizontal, tr("Users"));
 
     // We must register all custom types with the Qt Automoc system so that we are able to use it
-    // with signals/slots. In this case, QList falls under the umbrells of custom types.
+    // with signals/slots. In this case, QList falls under the umbrella of custom types.
     qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>");
 
     layout->setContentsMargins(0, 0, 0, 0);
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 5c98636c5..810954b36 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -379,6 +379,7 @@ void GRenderWindow::InitRenderTarget() {
     fmt.setVersion(4, 3);
     if (Settings::values.use_compatibility_profile) {
         fmt.setProfile(QSurfaceFormat::CompatibilityProfile);
+        fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
     } else {
         fmt.setProfile(QSurfaceFormat::CoreProfile);
     }
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index c8b0a5ec0..5477f050c 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -58,7 +58,7 @@ void CompatDB::Submit() {
 
         button(NextButton)->setEnabled(false);
         button(NextButton)->setText(tr("Submitting"));
-        button(QWizard::CancelButton)->setVisible(false);
+        button(CancelButton)->setVisible(false);
 
         testcase_watcher.setFuture(QtConcurrent::run(
             [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
@@ -74,12 +74,12 @@ void CompatDB::OnTestcaseSubmitted() {
                               tr("An error occured while sending the Testcase"));
         button(NextButton)->setEnabled(true);
         button(NextButton)->setText(tr("Next"));
-        button(QWizard::CancelButton)->setVisible(true);
+        button(CancelButton)->setVisible(true);
     } else {
         next();
         // older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a
         // workaround
-        button(QWizard::CancelButton)->setVisible(false);
+        button(CancelButton)->setVisible(false);
     }
 }
 
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index a5218b051..32c05b797 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -17,8 +17,12 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
     ui->hotkeysTab->Populate(registry);
     this->setConfiguration();
     this->PopulateSelectionList();
+
+    setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
+
     connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
             &ConfigureDialog::UpdateVisibleTabs);
+
     adjustSize();
     ui->selectorList->setCurrentRow(0);
 
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index c299c0b5b..08ea41b0f 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -69,16 +69,20 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
 ConfigureGraphics::~ConfigureGraphics() = default;
 
 void ConfigureGraphics::setConfiguration() {
+    const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
+
     ui->resolution_factor_combobox->setCurrentIndex(
         static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
     ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
     ui->frame_limit->setValue(Settings::values.frame_limit);
+    ui->use_compatibility_profile->setEnabled(runtime_lock);
     ui->use_compatibility_profile->setChecked(Settings::values.use_compatibility_profile);
+    ui->use_disk_shader_cache->setEnabled(runtime_lock);
     ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
     ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
-    ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
     ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
-    ui->force_30fps_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->force_30fps_mode->setEnabled(runtime_lock);
     ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
     UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
                                                  Settings::values.bg_blue));
diff --git a/src/yuzu/debugger/graphics/graphics_breakpoints.cpp b/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
index 67ed0ba6d..1c80082a4 100644
--- a/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
+++ b/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
@@ -135,7 +135,7 @@ GraphicsBreakPointsWidget::GraphicsBreakPointsWidget(
     std::shared_ptr<Tegra::DebugContext> debug_context, QWidget* parent)
     : QDockWidget(tr("Maxwell Breakpoints"), parent), Tegra::DebugContext::BreakPointObserver(
                                                           debug_context) {
-    setObjectName("TegraBreakPointsWidget");
+    setObjectName(QStringLiteral("TegraBreakPointsWidget"));
 
     status_text = new QLabel(tr("Emulation running"));
     resume_button = new QPushButton(tr("Resume"));
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index 86e03e46d..f594ef076 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -47,7 +47,7 @@ private:
 #endif
 
 MicroProfileDialog::MicroProfileDialog(QWidget* parent) : QWidget(parent, Qt::Dialog) {
-    setObjectName("MicroProfile");
+    setObjectName(QStringLiteral("MicroProfile"));
     setWindowTitle(tr("MicroProfile"));
     resize(1000, 600);
     // Remove the "?" button from the titlebar and enable the maximize button
@@ -191,7 +191,7 @@ void MicroProfileDrawText(int x, int y, u32 hex_color, const char* text, u32 tex
     for (u32 i = 0; i < text_length; ++i) {
         // Position the text baseline 1 pixel above the bottom of the text cell, this gives nice
         // vertical alignment of text for a wide range of tested fonts.
-        mp_painter->drawText(x, y + MICROPROFILE_TEXT_HEIGHT - 2, QChar(text[i]));
+        mp_painter->drawText(x, y + MICROPROFILE_TEXT_HEIGHT - 2, QString{QLatin1Char{text[i]}});
         x += MICROPROFILE_TEXT_WIDTH + 1;
     }
 }
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 85b095688..cd8180f8b 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -91,19 +91,19 @@ WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTa
 WaitTreeMutexInfo::~WaitTreeMutexInfo() = default;
 
 QString WaitTreeMutexInfo::GetText() const {
-    return tr("waiting for mutex 0x%1").arg(mutex_address, 16, 16, QLatin1Char('0'));
+    return tr("waiting for mutex 0x%1").arg(mutex_address, 16, 16, QLatin1Char{'0'});
 }
 
 std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeMutexInfo::GetChildren() const {
-    std::vector<std::unique_ptr<WaitTreeItem>> list;
-
-    bool has_waiters = (mutex_value & Kernel::Mutex::MutexHasWaitersFlag) != 0;
+    const bool has_waiters = (mutex_value & Kernel::Mutex::MutexHasWaitersFlag) != 0;
 
+    std::vector<std::unique_ptr<WaitTreeItem>> list;
     list.push_back(std::make_unique<WaitTreeText>(tr("has waiters: %1").arg(has_waiters)));
     list.push_back(std::make_unique<WaitTreeText>(
-        tr("owner handle: 0x%1").arg(owner_handle, 8, 16, QLatin1Char('0'))));
-    if (owner != nullptr)
+        tr("owner handle: 0x%1").arg(owner_handle, 8, 16, QLatin1Char{'0'})));
+    if (owner != nullptr) {
         list.push_back(std::make_unique<WaitTreeThread>(*owner));
+    }
     return list;
 }
 
@@ -121,11 +121,14 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons
     u64 base_pointer = thread.GetContext().cpu_registers[BaseRegister];
 
     while (base_pointer != 0) {
-        u64 lr = Memory::Read64(base_pointer + sizeof(u64));
-        if (lr == 0)
+        const u64 lr = Memory::Read64(base_pointer + sizeof(u64));
+        if (lr == 0) {
             break;
-        list.push_back(
-            std::make_unique<WaitTreeText>(tr("0x%1").arg(lr - sizeof(u32), 16, 16, QChar('0'))));
+        }
+
+        list.push_back(std::make_unique<WaitTreeText>(
+            tr("0x%1").arg(lr - sizeof(u32), 16, 16, QLatin1Char{'0'})));
+
         base_pointer = Memory::Read64(base_pointer);
     }
 
@@ -174,10 +177,10 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeWaitObject::GetChildren() con
 
 QString WaitTreeWaitObject::GetResetTypeQString(Kernel::ResetType reset_type) {
     switch (reset_type) {
-    case Kernel::ResetType::OneShot:
-        return tr("one shot");
-    case Kernel::ResetType::Sticky:
-        return tr("sticky");
+    case Kernel::ResetType::Automatic:
+        return tr("automatic reset");
+    case Kernel::ResetType::Manual:
+        return tr("manual reset");
     }
     UNREACHABLE();
     return {};
@@ -249,9 +252,9 @@ QString WaitTreeThread::GetText() const {
 
     const auto& context = thread.GetContext();
     const QString pc_info = tr(" PC = 0x%1 LR = 0x%2")
-                                .arg(context.pc, 8, 16, QLatin1Char('0'))
-                                .arg(context.cpu_registers[30], 8, 16, QLatin1Char('0'));
-    return WaitTreeWaitObject::GetText() + pc_info + " (" + status + ") ";
+                                .arg(context.pc, 8, 16, QLatin1Char{'0'})
+                                .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'});
+    return QStringLiteral("%1%2 (%3) ").arg(WaitTreeWaitObject::GetText(), pc_info, status);
 }
 
 QColor WaitTreeThread::GetColor() const {
@@ -424,7 +427,7 @@ void WaitTreeModel::InitItems() {
 }
 
 WaitTreeWidget::WaitTreeWidget(QWidget* parent) : QDockWidget(tr("Wait Tree"), parent) {
-    setObjectName("WaitTreeWidget");
+    setObjectName(QStringLiteral("WaitTreeWidget"));
     view = new QTreeView(this);
     view->setHeaderHidden(true);
     setWidget(view);
diff --git a/src/yuzu/hotkeys.h b/src/yuzu/hotkeys.h
index 4f526dc7e..248fadaf3 100644
--- a/src/yuzu/hotkeys.h
+++ b/src/yuzu/hotkeys.h
@@ -67,8 +67,6 @@ public:
 
 private:
     struct Hotkey {
-        Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {}
-
         QKeySequence keyseq;
         QShortcut* shortcut = nullptr;
         Qt::ShortcutContext context = Qt::WindowShortcut;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index e33e3aaaf..a59abf6e8 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -198,11 +198,11 @@ GMainWindow::GMainWindow()
 
     ConnectMenuEvents();
     ConnectWidgetEvents();
+
     LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch,
              Common::g_scm_desc);
+    UpdateWindowTitle();
 
-    setWindowTitle(QString("yuzu %1| %2-%3")
-                       .arg(Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc));
     show();
 
     Core::System::GetInstance().SetContentProvider(
@@ -936,9 +936,7 @@ void GMainWindow::BootGame(const QString& filename) {
             title_name = FileUtil::GetFilename(filename.toStdString());
     }
 
-    setWindowTitle(QString("yuzu %1| %4 | %2-%3")
-                       .arg(Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc,
-                            QString::fromStdString(title_name)));
+    UpdateWindowTitle(QString::fromStdString(title_name));
 
     loading_screen->Prepare(Core::System::GetInstance().GetAppLoader());
     loading_screen->show();
@@ -979,8 +977,8 @@ void GMainWindow::ShutdownGame() {
     loading_screen->Clear();
     game_list->show();
     game_list->setFilterFocus();
-    setWindowTitle(QString("yuzu %1| %2-%3")
-                       .arg(Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc));
+
+    UpdateWindowTitle();
 
     // Disable status bar updates
     status_bar_update_timer.stop();
@@ -1767,6 +1765,19 @@ void GMainWindow::OnCaptureScreenshot() {
     OnStartGame();
 }
 
+void GMainWindow::UpdateWindowTitle(const QString& title_name) {
+    const QString full_name = QString::fromUtf8(Common::g_build_fullname);
+    const QString branch_name = QString::fromUtf8(Common::g_scm_branch);
+    const QString description = QString::fromUtf8(Common::g_scm_desc);
+
+    if (title_name.isEmpty()) {
+        setWindowTitle(QStringLiteral("yuzu %1| %2-%3").arg(full_name, branch_name, description));
+    } else {
+        setWindowTitle(QStringLiteral("yuzu %1| %4 | %2-%3")
+                           .arg(full_name, branch_name, description, title_name));
+    }
+}
+
 void GMainWindow::UpdateStatusBar() {
     if (emu_thread == nullptr) {
         status_bar_update_timer.stop();
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index fb2a193cb..7bf82e665 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -209,6 +209,7 @@ private slots:
 
 private:
     std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id);
+    void UpdateWindowTitle(const QString& title_name = {});
     void UpdateStatusBar();
 
     Ui::MainWindow ui;
diff --git a/src/yuzu/util/spinbox.cpp b/src/yuzu/util/spinbox.cpp
deleted file mode 100644
index 14ef1e884..000000000
--- a/src/yuzu/util/spinbox.cpp
+++ /dev/null
@@ -1,278 +0,0 @@
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-// Copyright 2014 Tony Wasserka
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//     * Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above copyright
-//       notice, this list of conditions and the following disclaimer in the
-//       documentation and/or other materials provided with the distribution.
-//     * Neither the name of the owner nor the names of its contributors may
-//       be used to endorse or promote products derived from this software
-//       without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <cstdlib>
-#include <QLineEdit>
-#include <QRegExpValidator>
-#include "common/assert.h"
-#include "yuzu/util/spinbox.h"
-
-CSpinBox::CSpinBox(QWidget* parent)
-    : QAbstractSpinBox(parent), min_value(-100), max_value(100), value(0), base(10), num_digits(0) {
-    // TODO: Might be nice to not immediately call the slot.
-    //       Think of an address that is being replaced by a different one, in which case a lot
-    //       invalid intermediate addresses would be read from during editing.
-    connect(lineEdit(), &QLineEdit::textEdited, this, &CSpinBox::OnEditingFinished);
-
-    UpdateText();
-}
-
-void CSpinBox::SetValue(qint64 val) {
-    auto old_value = value;
-    value = std::max(std::min(val, max_value), min_value);
-
-    if (old_value != value) {
-        UpdateText();
-        emit ValueChanged(value);
-    }
-}
-
-void CSpinBox::SetRange(qint64 min, qint64 max) {
-    min_value = min;
-    max_value = max;
-
-    SetValue(value);
-    UpdateText();
-}
-
-void CSpinBox::stepBy(int steps) {
-    auto new_value = value;
-    // Scale number of steps by the currently selected digit
-    // TODO: Move this code elsewhere and enable it.
-    // TODO: Support for num_digits==0, too
-    // TODO: Support base!=16, too
-    // TODO: Make the cursor not jump back to the end of the line...
-    /*if (base == 16 && num_digits > 0) {
-        int digit = num_digits - (lineEdit()->cursorPosition() - prefix.length()) - 1;
-        digit = std::max(0, std::min(digit, num_digits - 1));
-        steps <<= digit * 4;
-    }*/
-
-    // Increment "new_value" by "steps", and perform annoying overflow checks, too.
-    if (steps < 0 && new_value + steps > new_value) {
-        new_value = std::numeric_limits<qint64>::min();
-    } else if (steps > 0 && new_value + steps < new_value) {
-        new_value = std::numeric_limits<qint64>::max();
-    } else {
-        new_value += steps;
-    }
-
-    SetValue(new_value);
-    UpdateText();
-}
-
-QAbstractSpinBox::StepEnabled CSpinBox::stepEnabled() const {
-    StepEnabled ret = StepNone;
-
-    if (value > min_value)
-        ret |= StepDownEnabled;
-
-    if (value < max_value)
-        ret |= StepUpEnabled;
-
-    return ret;
-}
-
-void CSpinBox::SetBase(int base) {
-    this->base = base;
-
-    UpdateText();
-}
-
-void CSpinBox::SetNumDigits(int num_digits) {
-    this->num_digits = num_digits;
-
-    UpdateText();
-}
-
-void CSpinBox::SetPrefix(const QString& prefix) {
-    this->prefix = prefix;
-
-    UpdateText();
-}
-
-void CSpinBox::SetSuffix(const QString& suffix) {
-    this->suffix = suffix;
-
-    UpdateText();
-}
-
-static QString StringToInputMask(const QString& input) {
-    QString mask = input;
-
-    // ... replace any special characters by their escaped counterparts ...
-    mask.replace("\\", "\\\\");
-    mask.replace("A", "\\A");
-    mask.replace("a", "\\a");
-    mask.replace("N", "\\N");
-    mask.replace("n", "\\n");
-    mask.replace("X", "\\X");
-    mask.replace("x", "\\x");
-    mask.replace("9", "\\9");
-    mask.replace("0", "\\0");
-    mask.replace("D", "\\D");
-    mask.replace("d", "\\d");
-    mask.replace("#", "\\#");
-    mask.replace("H", "\\H");
-    mask.replace("h", "\\h");
-    mask.replace("B", "\\B");
-    mask.replace("b", "\\b");
-    mask.replace(">", "\\>");
-    mask.replace("<", "\\<");
-    mask.replace("!", "\\!");
-
-    return mask;
-}
-
-void CSpinBox::UpdateText() {
-    // If a fixed number of digits is used, we put the line edit in insertion mode by setting an
-    // input mask.
-    QString mask;
-    if (num_digits != 0) {
-        mask += StringToInputMask(prefix);
-
-        // For base 10 and negative range, demand a single sign character
-        if (HasSign())
-            mask += "X"; // identified as "-" or "+" in the validator
-
-        // Uppercase digits greater than 9.
-        mask += ">";
-
-        // Match num_digits digits
-        // Digits irrelevant to the chosen number base are filtered in the validator
-        mask += QString("H").repeated(std::max(num_digits, 1));
-
-        // Switch off case conversion
-        mask += "!";
-
-        mask += StringToInputMask(suffix);
-    }
-    lineEdit()->setInputMask(mask);
-
-    // Set new text without changing the cursor position. This will cause the cursor to briefly
-    // appear at the end of the line and then to jump back to its original position. That's
-    // a bit ugly, but better than having setText() move the cursor permanently all the time.
-    int cursor_position = lineEdit()->cursorPosition();
-    lineEdit()->setText(TextFromValue());
-    lineEdit()->setCursorPosition(cursor_position);
-}
-
-QString CSpinBox::TextFromValue() {
-    return prefix + QString(HasSign() ? ((value < 0) ? "-" : "+") : "") +
-           QString("%1").arg(std::abs(value), num_digits, base, QLatin1Char('0')).toUpper() +
-           suffix;
-}
-
-qint64 CSpinBox::ValueFromText() {
-    unsigned strpos = prefix.length();
-
-    QString num_string = text().mid(strpos, text().length() - strpos - suffix.length());
-    return num_string.toLongLong(nullptr, base);
-}
-
-bool CSpinBox::HasSign() const {
-    return base == 10 && min_value < 0;
-}
-
-void CSpinBox::OnEditingFinished() {
-    // Only update for valid input
-    QString input = lineEdit()->text();
-    int pos = 0;
-    if (QValidator::Acceptable == validate(input, pos))
-        SetValue(ValueFromText());
-}
-
-QValidator::State CSpinBox::validate(QString& input, int& pos) const {
-    if (!prefix.isEmpty() && input.left(prefix.length()) != prefix)
-        return QValidator::Invalid;
-
-    int strpos = prefix.length();
-
-    // Empty "numbers" allowed as intermediate values
-    if (strpos >= input.length() - HasSign() - suffix.length())
-        return QValidator::Intermediate;
-
-    DEBUG_ASSERT(base <= 10 || base == 16);
-    QString regexp;
-
-    // Demand sign character for negative ranges
-    if (HasSign())
-        regexp += "[+\\-]";
-
-    // Match digits corresponding to the chosen number base.
-    regexp += QString("[0-%1").arg(std::min(base, 9));
-    if (base == 16) {
-        regexp += "a-fA-F";
-    }
-    regexp += "]";
-
-    // Specify number of digits
-    if (num_digits > 0) {
-        regexp += QString("{%1}").arg(num_digits);
-    } else {
-        regexp += "+";
-    }
-
-    // Match string
-    QRegExp num_regexp(regexp);
-    int num_pos = strpos;
-    QString sub_input = input.mid(strpos, input.length() - strpos - suffix.length());
-
-    if (!num_regexp.exactMatch(sub_input) && num_regexp.matchedLength() == 0)
-        return QValidator::Invalid;
-
-    sub_input = sub_input.left(num_regexp.matchedLength());
-    bool ok;
-    qint64 val = sub_input.toLongLong(&ok, base);
-
-    if (!ok)
-        return QValidator::Invalid;
-
-    // Outside boundaries => don't accept
-    if (val < min_value || val > max_value)
-        return QValidator::Invalid;
-
-    // Make sure we are actually at the end of this string...
-    strpos += num_regexp.matchedLength();
-
-    if (!suffix.isEmpty() && input.mid(strpos) != suffix) {
-        return QValidator::Invalid;
-    } else {
-        strpos += suffix.length();
-    }
-
-    if (strpos != input.length())
-        return QValidator::Invalid;
-
-    // At this point we can say for sure that the input is fine. Let's fix it up a bit though
-    input.replace(num_pos, sub_input.length(), sub_input.toUpper());
-
-    return QValidator::Acceptable;
-}
diff --git a/src/yuzu/util/spinbox.h b/src/yuzu/util/spinbox.h
deleted file mode 100644
index 2fa1db3a4..000000000
--- a/src/yuzu/util/spinbox.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-// Copyright 2014 Tony Wasserka
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//     * Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above copyright
-//       notice, this list of conditions and the following disclaimer in the
-//       documentation and/or other materials provided with the distribution.
-//     * Neither the name of the owner nor the names of its contributors may
-//       be used to endorse or promote products derived from this software
-//       without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include <QAbstractSpinBox>
-#include <QtGlobal>
-
-class QVariant;
-
-/**
- * A custom spin box widget with enhanced functionality over Qt's QSpinBox
- */
-class CSpinBox : public QAbstractSpinBox {
-    Q_OBJECT
-
-public:
-    explicit CSpinBox(QWidget* parent = nullptr);
-
-    void stepBy(int steps) override;
-    StepEnabled stepEnabled() const override;
-
-    void SetValue(qint64 val);
-
-    void SetRange(qint64 min, qint64 max);
-
-    void SetBase(int base);
-
-    void SetPrefix(const QString& prefix);
-    void SetSuffix(const QString& suffix);
-
-    void SetNumDigits(int num_digits);
-
-    QValidator::State validate(QString& input, int& pos) const override;
-
-signals:
-    void ValueChanged(qint64 val);
-
-private slots:
-    void OnEditingFinished();
-
-private:
-    void UpdateText();
-
-    bool HasSign() const;
-
-    QString TextFromValue();
-    qint64 ValueFromText();
-
-    qint64 min_value, max_value;
-
-    qint64 value;
-
-    QString prefix, suffix;
-
-    int base;
-
-    int num_digits;
-};
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 68a176032..8f104062d 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -176,9 +176,13 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
 
     SDL_SetMainReady();
 
+    const SDL_GLprofile profile = Settings::values.use_compatibility_profile
+                                      ? SDL_GL_CONTEXT_PROFILE_COMPATIBILITY
+                                      : SDL_GL_CONTEXT_PROFILE_CORE;
+
     SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
     SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
-    SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
+    SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, profile);
     SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
     SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8);
     SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8);
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index a1d7879b1..d3734927b 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -222,6 +222,7 @@ int main(int argc, char** argv) {
 
     system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
 
+    emu_window->MakeCurrent();
     system.Renderer().Rasterizer().LoadDiskResources();
 
     while (emu_window->IsOpen()) {