238 files changed, 6329 insertions, 2776 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f69d00a2b..6c99dd5e2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,18 +1,79 @@
 # Enable modules to include each other's files
 include_directories(.)
 
+# CMake seems to only define _DEBUG on Windows
+set_property(DIRECTORY APPEND PROPERTY
+    COMPILE_DEFINITIONS $<$<CONFIG:Debug>:_DEBUG> $<$<NOT:$<CONFIG:Debug>>:NDEBUG>)
+
+# Set compilation flags
+if (MSVC)
+    set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE)
+
+    # Silence "deprecation" warnings
+    add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS)
+
+    # Avoid windows.h junk
+    add_definitions(-DNOMINMAX)
+
+    # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors.
+    add_definitions(-DWIN32_LEAN_AND_MEAN)
+
+    # /W3 - Level 3 warnings
+    # /MP - Multi-threaded compilation
+    # /Zi - Output debugging information
+    # /Zo - enhanced debug info for optimized builds
+    # /permissive- - enables stricter C++ standards conformance checks
+    # /EHsc - C++-only exception handling semantics
+    # /Zc:throwingNew - let codegen assume `operator new` will never return null
+    # /Zc:inline - let codegen omit inline functions in object files
+    add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
+
+    # /GS- - No stack buffer overflow checks
+    add_compile_options("$<$<CONFIG:Release>:/GS->")
+
+    set(CMAKE_EXE_LINKER_FLAGS_DEBUG   "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
+    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
+else()
+    add_compile_options("-Wno-attributes")
+
+    if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
+        add_compile_options("-stdlib=libc++")
+    endif()
+
+    # Set file offset size to 64 bits.
+    #
+    # On modern Unixes, this is typically already the case. The lone exception is
+    # glibc, which may default to 32 bits. glibc allows this to be configured
+    # by setting _FILE_OFFSET_BITS.
+    if(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR MINGW)
+        add_definitions(-D_FILE_OFFSET_BITS=64)
+    endif()
+
+    if (MINGW)
+        add_definitions(-DMINGW_HAS_SECURE_API)
+
+        if (MINGW_STATIC_BUILD)
+            add_definitions(-DQT_STATICPLUGIN)
+            add_compile_options("-static")
+        endif()
+    endif()
+endif()
+
 add_subdirectory(common)
 add_subdirectory(core)
 add_subdirectory(audio_core)
 add_subdirectory(video_core)
 add_subdirectory(input_common)
 add_subdirectory(tests)
+
 if (ENABLE_SDL2)
     add_subdirectory(yuzu_cmd)
 endif()
+
 if (ENABLE_QT)
     add_subdirectory(yuzu)
 endif()
+
 if (ENABLE_WEB_SERVICE)
     add_subdirectory(web_service)
 endif()
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 4b66a6786..22a3f8c84 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -38,7 +38,7 @@ Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format fo
       sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {
 
     release_event = core_timing.RegisterEvent(
-        name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); });
+        name, [this](u64 userdata, s64 cycles_late) { ReleaseActiveBuffer(); });
 }
 
 void Stream::Play() {
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 3d30f0e3e..1e8e1b215 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -91,11 +91,18 @@ add_library(common STATIC
     logging/log.h
     logging/text_formatter.cpp
     logging/text_formatter.h
+    lz4_compression.cpp
+    lz4_compression.h
     math_util.h
+    memory_hook.cpp
+    memory_hook.h
     microprofile.cpp
     microprofile.h
     microprofileui.h
     misc.cpp
+    multi_level_queue.h
+    page_table.cpp
+    page_table.h
     param_package.cpp
     param_package.h
     quaternion.h
@@ -114,8 +121,12 @@ add_library(common STATIC
     threadsafe_queue.h
     timer.cpp
     timer.h
+    uint128.cpp
+    uint128.h
     vector_math.h
     web_result.h
+    zstd_compression.cpp
+    zstd_compression.h
 )
 
 if(ARCHITECTURE_x86_64)
@@ -129,3 +140,4 @@ endif()
 create_target_directory_groups(common)
 
 target_link_libraries(common PUBLIC Boost::boost fmt microprofile)
+target_link_libraries(common PRIVATE lz4_static libzstd_static)
diff --git a/src/common/assert.h b/src/common/assert.h
index 6002f7ab1..4b0e3f64e 100644
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -57,3 +57,21 @@ __declspec(noinline, noreturn)
 
 #define UNIMPLEMENTED_IF(cond) ASSERT_MSG(!(cond), "Unimplemented code!")
 #define UNIMPLEMENTED_IF_MSG(cond, ...) ASSERT_MSG(!(cond), __VA_ARGS__)
+
+// If the assert is ignored, execute _b_
+#define ASSERT_OR_EXECUTE(_a_, _b_)                                                                \
+    do {                                                                                           \
+        ASSERT(_a_);                                                                               \
+        if (!(_a_)) {                                                                              \
+            _b_                                                                                    \
+        }                                                                                          \
+    } while (0)
+
+// If the assert is ignored, execute _b_
+#define ASSERT_OR_EXECUTE_MSG(_a_, _b_, ...)                                                       \
+    do {                                                                                           \
+        ASSERT_MSG(_a_, __VA_ARGS__);                                                              \
+        if (!(_a_)) {                                                                              \
+            _b_                                                                                    \
+        }                                                                                          \
+    } while (0)
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 7433c39ba..902e668e3 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -34,6 +34,7 @@
 #include <limits>
 #include <type_traits>
 #include "common/common_funcs.h"
+#include "common/swap.h"
 
 /*
  * Abstract bitfield class
@@ -108,7 +109,7 @@
  * symptoms.
  */
 #pragma pack(1)
-template <std::size_t Position, std::size_t Bits, typename T>
+template <std::size_t Position, std::size_t Bits, typename T, typename EndianTag = LETag>
 struct BitField {
 private:
     // UnderlyingType is T for non-enum types and the underlying type of T if
@@ -121,6 +122,8 @@ private:
     // We store the value as the unsigned type to avoid undefined behaviour on value shifting
     using StorageType = std::make_unsigned_t<UnderlyingType>;
 
+    using StorageTypeWithEndian = typename AddEndian<StorageType, EndianTag>::type;
+
 public:
     /// Constants to allow limited introspection of fields if needed
     static constexpr std::size_t position = Position;
@@ -170,7 +173,7 @@ public:
     }
 
     constexpr FORCE_INLINE void Assign(const T& value) {
-        storage = (storage & ~mask) | FormatValue(value);
+        storage = (static_cast<StorageType>(storage) & ~mask) | FormatValue(value);
     }
 
     constexpr T Value() const {
@@ -182,7 +185,7 @@ public:
     }
 
 private:
-    StorageType storage;
+    StorageTypeWithEndian storage;
 
     static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range");
 
@@ -193,3 +196,6 @@ private:
     static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
 };
 #pragma pack()
+
+template <std::size_t Position, std::size_t Bits, typename T>
+using BitFieldBE = BitField<Position, Bits, T, BETag>;
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index 1eea17ba1..d032df413 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -32,7 +32,7 @@ inline u32 CountLeadingZeroes32(u32 value) {
     return 32;
 }
 
-inline u64 CountLeadingZeroes64(u64 value) {
+inline u32 CountLeadingZeroes64(u64 value) {
     unsigned long leading_zero = 0;
 
     if (_BitScanReverse64(&leading_zero, value) != 0) {
@@ -47,15 +47,54 @@ inline u32 CountLeadingZeroes32(u32 value) {
         return 32;
     }
 
-    return __builtin_clz(value);
+    return static_cast<u32>(__builtin_clz(value));
 }
 
-inline u64 CountLeadingZeroes64(u64 value) {
+inline u32 CountLeadingZeroes64(u64 value) {
     if (value == 0) {
         return 64;
     }
 
-    return __builtin_clzll(value);
+    return static_cast<u32>(__builtin_clzll(value));
 }
 #endif
+
+#ifdef _MSC_VER
+inline u32 CountTrailingZeroes32(u32 value) {
+    unsigned long trailing_zero = 0;
+
+    if (_BitScanForward(&trailing_zero, value) != 0) {
+        return trailing_zero;
+    }
+
+    return 32;
+}
+
+inline u32 CountTrailingZeroes64(u64 value) {
+    unsigned long trailing_zero = 0;
+
+    if (_BitScanForward64(&trailing_zero, value) != 0) {
+        return trailing_zero;
+    }
+
+    return 64;
+}
+#else
+inline u32 CountTrailingZeroes32(u32 value) {
+    if (value == 0) {
+        return 32;
+    }
+
+    return static_cast<u32>(__builtin_ctz(value));
+}
+
+inline u32 CountTrailingZeroes64(u64 value) {
+    if (value == 0) {
+        return 64;
+    }
+
+    return static_cast<u32>(__builtin_ctzll(value));
+}
+#endif
+
 } // namespace Common
diff --git a/src/common/common_types.h b/src/common/common_types.h
index 6b1766dca..4cec89fbd 100644
--- a/src/common/common_types.h
+++ b/src/common/common_types.h
@@ -40,10 +40,9 @@ using s64 = std::int64_t; ///< 64-bit signed int
 using f32 = float;  ///< 32-bit floating point
 using f64 = double; ///< 64-bit floating point
 
-// TODO: It would be nice to eventually replace these with strong types that prevent accidental
-// conversion between each other.
-using VAddr = u64; ///< Represents a pointer in the userspace virtual address space.
-using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
+using VAddr = u64;    ///< Represents a pointer in the userspace virtual address space.
+using PAddr = u64;    ///< Represents a pointer in the ARM11 physical address space.
+using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space.
 
 using u128 = std::array<std::uint64_t, 2>;
 static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
diff --git a/src/common/detached_tasks.cpp b/src/common/detached_tasks.cpp
index a347d9e02..f268d6021 100644
--- a/src/common/detached_tasks.cpp
+++ b/src/common/detached_tasks.cpp
@@ -16,22 +16,22 @@ DetachedTasks::DetachedTasks() {
 }
 
 void DetachedTasks::WaitForAllTasks() {
-    std::unique_lock<std::mutex> lock(mutex);
+    std::unique_lock lock{mutex};
     cv.wait(lock, [this]() { return count == 0; });
 }
 
 DetachedTasks::~DetachedTasks() {
-    std::unique_lock<std::mutex> lock(mutex);
+    std::unique_lock lock{mutex};
     ASSERT(count == 0);
     instance = nullptr;
 }
 
 void DetachedTasks::AddTask(std::function<void()> task) {
-    std::unique_lock<std::mutex> lock(instance->mutex);
+    std::unique_lock lock{instance->mutex};
     ++instance->count;
     std::thread([task{std::move(task)}]() {
         task();
-        std::unique_lock<std::mutex> lock(instance->mutex);
+        std::unique_lock lock{instance->mutex};
         --instance->count;
         std::notify_all_at_thread_exit(instance->cv, std::move(lock));
     })
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 4462ff3fb..a03179520 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -46,12 +46,12 @@ public:
     }
 
     void AddBackend(std::unique_ptr<Backend> backend) {
-        std::lock_guard<std::mutex> lock(writing_mutex);
+        std::lock_guard lock{writing_mutex};
         backends.push_back(std::move(backend));
     }
 
     void RemoveBackend(std::string_view backend_name) {
-        std::lock_guard<std::mutex> lock(writing_mutex);
+        std::lock_guard lock{writing_mutex};
         const auto it =
             std::remove_if(backends.begin(), backends.end(),
                            [&backend_name](const auto& i) { return backend_name == i->GetName(); });
@@ -80,7 +80,7 @@ private:
         backend_thread = std::thread([&] {
             Entry entry;
             auto write_logs = [&](Entry& e) {
-                std::lock_guard<std::mutex> lock(writing_mutex);
+                std::lock_guard lock{writing_mutex};
                 for (const auto& backend : backends) {
                     backend->Write(e);
                 }
diff --git a/src/common/lz4_compression.cpp b/src/common/lz4_compression.cpp
new file mode 100644
index 000000000..ade6759bb
--- /dev/null
+++ b/src/common/lz4_compression.cpp
@@ -0,0 +1,76 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <lz4hc.h>
+
+#include "common/assert.h"
+#include "common/lz4_compression.h"
+
+namespace Common::Compression {
+
+std::vector<u8> CompressDataLZ4(const u8* source, std::size_t source_size) {
+    ASSERT_MSG(source_size <= LZ4_MAX_INPUT_SIZE, "Source size exceeds LZ4 maximum input size");
+
+    const auto source_size_int = static_cast<int>(source_size);
+    const int max_compressed_size = LZ4_compressBound(source_size_int);
+    std::vector<u8> compressed(max_compressed_size);
+
+    const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
+                                                     reinterpret_cast<char*>(compressed.data()),
+                                                     source_size_int, max_compressed_size);
+
+    if (compressed_size <= 0) {
+        // Compression failed
+        return {};
+    }
+
+    compressed.resize(compressed_size);
+
+    return compressed;
+}
+
+std::vector<u8> CompressDataLZ4HC(const u8* source, std::size_t source_size,
+                                  s32 compression_level) {
+    ASSERT_MSG(source_size <= LZ4_MAX_INPUT_SIZE, "Source size exceeds LZ4 maximum input size");
+
+    compression_level = std::clamp(compression_level, LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX);
+
+    const auto source_size_int = static_cast<int>(source_size);
+    const int max_compressed_size = LZ4_compressBound(source_size_int);
+    std::vector<u8> compressed(max_compressed_size);
+
+    const int compressed_size = LZ4_compress_HC(
+        reinterpret_cast<const char*>(source), reinterpret_cast<char*>(compressed.data()),
+        source_size_int, max_compressed_size, compression_level);
+
+    if (compressed_size <= 0) {
+        // Compression failed
+        return {};
+    }
+
+    compressed.resize(compressed_size);
+
+    return compressed;
+}
+
+std::vector<u8> CompressDataLZ4HCMax(const u8* source, std::size_t source_size) {
+    return CompressDataLZ4HC(source, source_size, LZ4HC_CLEVEL_MAX);
+}
+
+std::vector<u8> DecompressDataLZ4(const std::vector<u8>& compressed,
+                                  std::size_t uncompressed_size) {
+    std::vector<u8> uncompressed(uncompressed_size);
+    const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
+                                               reinterpret_cast<char*>(uncompressed.data()),
+                                               static_cast<int>(compressed.size()),
+                                               static_cast<int>(uncompressed.size()));
+    if (static_cast<int>(uncompressed_size) != size_check) {
+        // Decompression failed
+        return {};
+    }
+    return uncompressed;
+}
+
+} // namespace Common::Compression
diff --git a/src/common/lz4_compression.h b/src/common/lz4_compression.h
new file mode 100644
index 000000000..fe2231a6c
--- /dev/null
+++ b/src/common/lz4_compression.h
@@ -0,0 +1,55 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Common::Compression {
+
+/**
+ * Compresses a source memory region with LZ4 and returns the compressed data in a vector.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataLZ4(const u8* source, std::size_t source_size);
+
+/**
+ * Utilizes the LZ4 subalgorithm LZ4HC with the specified compression level. Higher compression
+ * levels result in a smaller compressed size, but require more CPU time for compression. The
+ * compression level has almost no impact on decompression speed. Data compressed with LZ4HC can
+ * also be decompressed with the default LZ4 decompression.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ * @param compression_level the used compression level. Should be between 3 and 12.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataLZ4HC(const u8* source, std::size_t source_size, s32 compression_level);
+
+/**
+ * Utilizes the LZ4 subalgorithm LZ4HC with the highest possible compression level.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataLZ4HCMax(const u8* source, std::size_t source_size);
+
+/**
+ * Decompresses a source memory region with LZ4 and returns the uncompressed data in a vector.
+ *
+ * @param compressed the compressed source memory region.
+ * @param uncompressed_size the size in bytes of the uncompressed data.
+ *
+ * @return the decompressed data.
+ */
+std::vector<u8> DecompressDataLZ4(const std::vector<u8>& compressed, std::size_t uncompressed_size);
+
+} // namespace Common::Compression
+\ No newline at end of file
diff --git a/src/core/memory_hook.cpp b/src/common/memory_hook.cpp
index c61c6c1fb..3986986d6 100644
--- a/src/core/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -2,10 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "core/memory_hook.h"
+#include "common/memory_hook.h"
 
-namespace Memory {
+namespace Common {
 
 MemoryHook::~MemoryHook() = default;
 
-} // namespace Memory
+} // namespace Common
diff --git a/src/core/memory_hook.h b/src/common/memory_hook.h
index 940777107..adaa4c2c5 100644
--- a/src/core/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -9,7 +9,7 @@
 
 #include "common/common_types.h"
 
-namespace Memory {
+namespace Common {
 
 /**
  * Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
 };
 
 using MemoryHookPointer = std::shared_ptr<MemoryHook>;
-} // namespace Memory
+} // namespace Common
diff --git a/src/common/multi_level_queue.h b/src/common/multi_level_queue.h
new file mode 100644
index 000000000..9cb448f56
--- /dev/null
+++ b/src/common/multi_level_queue.h
@@ -0,0 +1,337 @@
+// Copyright 2019 TuxSH
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <iterator>
+#include <list>
+#include <utility>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+
+namespace Common {
+
+/**
+ * A MultiLevelQueue is a type of priority queue which has the following characteristics:
+ * - iteratable through each of its elements.
+ * - back can be obtained.
+ * - O(1) add, lookup (both front and back)
+ * - discrete priorities and a max of 64 priorities (limited domain)
+ * This type of priority queue is normaly used for managing threads within an scheduler
+ */
+template <typename T, std::size_t Depth>
+class MultiLevelQueue {
+public:
+    using value_type = T;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+
+    using difference_type = typename std::pointer_traits<pointer>::difference_type;
+    using size_type = std::size_t;
+
+    template <bool is_constant>
+    class iterator_impl {
+    public:
+        using iterator_category = std::bidirectional_iterator_tag;
+        using value_type = T;
+        using pointer = std::conditional_t<is_constant, T*, const T*>;
+        using reference = std::conditional_t<is_constant, const T&, T&>;
+        using difference_type = typename std::pointer_traits<pointer>::difference_type;
+
+        friend bool operator==(const iterator_impl& lhs, const iterator_impl& rhs) {
+            if (lhs.IsEnd() && rhs.IsEnd())
+                return true;
+            return std::tie(lhs.current_priority, lhs.it) == std::tie(rhs.current_priority, rhs.it);
+        }
+
+        friend bool operator!=(const iterator_impl& lhs, const iterator_impl& rhs) {
+            return !operator==(lhs, rhs);
+        }
+
+        reference operator*() const {
+            return *it;
+        }
+
+        pointer operator->() const {
+            return it.operator->();
+        }
+
+        iterator_impl& operator++() {
+            if (IsEnd()) {
+                return *this;
+            }
+
+            ++it;
+
+            if (it == GetEndItForPrio()) {
+                u64 prios = mlq.used_priorities;
+                prios &= ~((1ULL << (current_priority + 1)) - 1);
+                if (prios == 0) {
+                    current_priority = static_cast<u32>(mlq.depth());
+                } else {
+                    current_priority = CountTrailingZeroes64(prios);
+                    it = GetBeginItForPrio();
+                }
+            }
+            return *this;
+        }
+
+        iterator_impl& operator--() {
+            if (IsEnd()) {
+                if (mlq.used_priorities != 0) {
+                    current_priority = 63 - CountLeadingZeroes64(mlq.used_priorities);
+                    it = GetEndItForPrio();
+                    --it;
+                }
+            } else if (it == GetBeginItForPrio()) {
+                u64 prios = mlq.used_priorities;
+                prios &= (1ULL << current_priority) - 1;
+                if (prios != 0) {
+                    current_priority = CountTrailingZeroes64(prios);
+                    it = GetEndItForPrio();
+                    --it;
+                }
+            } else {
+                --it;
+            }
+            return *this;
+        }
+
+        iterator_impl operator++(int) {
+            const iterator_impl v{*this};
+            ++(*this);
+            return v;
+        }
+
+        iterator_impl operator--(int) {
+            const iterator_impl v{*this};
+            --(*this);
+            return v;
+        }
+
+        // allow implicit const->non-const
+        iterator_impl(const iterator_impl<false>& other)
+            : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
+
+        iterator_impl(const iterator_impl<true>& other)
+            : mlq(other.mlq), it(other.it), current_priority(other.current_priority) {}
+
+        iterator_impl& operator=(const iterator_impl<false>& other) {
+            mlq = other.mlq;
+            it = other.it;
+            current_priority = other.current_priority;
+            return *this;
+        }
+
+        friend class iterator_impl<true>;
+        iterator_impl() = default;
+
+    private:
+        friend class MultiLevelQueue;
+        using container_ref =
+            std::conditional_t<is_constant, const MultiLevelQueue&, MultiLevelQueue&>;
+        using list_iterator = std::conditional_t<is_constant, typename std::list<T>::const_iterator,
+                                                 typename std::list<T>::iterator>;
+
+        explicit iterator_impl(container_ref mlq, list_iterator it, u32 current_priority)
+            : mlq(mlq), it(it), current_priority(current_priority) {}
+        explicit iterator_impl(container_ref mlq, u32 current_priority)
+            : mlq(mlq), it(), current_priority(current_priority) {}
+
+        bool IsEnd() const {
+            return current_priority == mlq.depth();
+        }
+
+        list_iterator GetBeginItForPrio() const {
+            return mlq.levels[current_priority].begin();
+        }
+
+        list_iterator GetEndItForPrio() const {
+            return mlq.levels[current_priority].end();
+        }
+
+        container_ref mlq;
+        list_iterator it;
+        u32 current_priority;
+    };
+
+    using iterator = iterator_impl<false>;
+    using const_iterator = iterator_impl<true>;
+
+    void add(const T& element, u32 priority, bool send_back = true) {
+        if (send_back)
+            levels[priority].push_back(element);
+        else
+            levels[priority].push_front(element);
+        used_priorities |= 1ULL << priority;
+    }
+
+    void remove(const T& element, u32 priority) {
+        auto it = ListIterateTo(levels[priority], element);
+        if (it == levels[priority].end())
+            return;
+        levels[priority].erase(it);
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void adjust(const T& element, u32 old_priority, u32 new_priority, bool adjust_front = false) {
+        remove(element, old_priority);
+        add(element, new_priority, !adjust_front);
+    }
+    void adjust(const_iterator it, u32 old_priority, u32 new_priority, bool adjust_front = false) {
+        adjust(*it, old_priority, new_priority, adjust_front);
+    }
+
+    void transfer_to_front(const T& element, u32 priority, MultiLevelQueue& other) {
+        ListSplice(other.levels[priority], other.levels[priority].begin(), levels[priority],
+                   ListIterateTo(levels[priority], element));
+
+        other.used_priorities |= 1ULL << priority;
+
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void transfer_to_front(const_iterator it, u32 priority, MultiLevelQueue& other) {
+        transfer_to_front(*it, priority, other);
+    }
+
+    void transfer_to_back(const T& element, u32 priority, MultiLevelQueue& other) {
+        ListSplice(other.levels[priority], other.levels[priority].end(), levels[priority],
+                   ListIterateTo(levels[priority], element));
+
+        other.used_priorities |= 1ULL << priority;
+
+        if (levels[priority].empty()) {
+            used_priorities &= ~(1ULL << priority);
+        }
+    }
+
+    void transfer_to_back(const_iterator it, u32 priority, MultiLevelQueue& other) {
+        transfer_to_back(*it, priority, other);
+    }
+
+    void yield(u32 priority, std::size_t n = 1) {
+        ListShiftForward(levels[priority], n);
+    }
+
+    std::size_t depth() const {
+        return Depth;
+    }
+
+    std::size_t size(u32 priority) const {
+        return levels[priority].size();
+    }
+
+    std::size_t size() const {
+        u64 priorities = used_priorities;
+        std::size_t size = 0;
+        while (priorities != 0) {
+            const u64 current_priority = CountTrailingZeroes64(priorities);
+            size += levels[current_priority].size();
+            priorities &= ~(1ULL << current_priority);
+        }
+        return size;
+    }
+
+    bool empty() const {
+        return used_priorities == 0;
+    }
+
+    bool empty(u32 priority) const {
+        return (used_priorities & (1ULL << priority)) == 0;
+    }
+
+    u32 highest_priority_set(u32 max_priority = 0) const {
+        const u64 priorities =
+            max_priority == 0 ? used_priorities : (used_priorities & ~((1ULL << max_priority) - 1));
+        return priorities == 0 ? Depth : static_cast<u32>(CountTrailingZeroes64(priorities));
+    }
+
+    u32 lowest_priority_set(u32 min_priority = Depth - 1) const {
+        const u64 priorities = min_priority >= Depth - 1
+                                   ? used_priorities
+                                   : (used_priorities & ((1ULL << (min_priority + 1)) - 1));
+        return priorities == 0 ? Depth : 63 - CountLeadingZeroes64(priorities);
+    }
+
+    const_iterator cbegin(u32 max_prio = 0) const {
+        const u32 priority = highest_priority_set(max_prio);
+        return priority == Depth ? cend()
+                                 : const_iterator{*this, levels[priority].cbegin(), priority};
+    }
+    const_iterator begin(u32 max_prio = 0) const {
+        return cbegin(max_prio);
+    }
+    iterator begin(u32 max_prio = 0) {
+        const u32 priority = highest_priority_set(max_prio);
+        return priority == Depth ? end() : iterator{*this, levels[priority].begin(), priority};
+    }
+
+    const_iterator cend(u32 min_prio = Depth - 1) const {
+        return min_prio == Depth - 1 ? const_iterator{*this, Depth} : cbegin(min_prio + 1);
+    }
+    const_iterator end(u32 min_prio = Depth - 1) const {
+        return cend(min_prio);
+    }
+    iterator end(u32 min_prio = Depth - 1) {
+        return min_prio == Depth - 1 ? iterator{*this, Depth} : begin(min_prio + 1);
+    }
+
+    T& front(u32 max_priority = 0) {
+        const u32 priority = highest_priority_set(max_priority);
+        return levels[priority == Depth ? 0 : priority].front();
+    }
+    const T& front(u32 max_priority = 0) const {
+        const u32 priority = highest_priority_set(max_priority);
+        return levels[priority == Depth ? 0 : priority].front();
+    }
+
+    T back(u32 min_priority = Depth - 1) {
+        const u32 priority = lowest_priority_set(min_priority); // intended
+        return levels[priority == Depth ? 63 : priority].back();
+    }
+    const T& back(u32 min_priority = Depth - 1) const {
+        const u32 priority = lowest_priority_set(min_priority); // intended
+        return levels[priority == Depth ? 63 : priority].back();
+    }
+
+private:
+    using const_list_iterator = typename std::list<T>::const_iterator;
+
+    static void ListShiftForward(std::list<T>& list, const std::size_t shift = 1) {
+        if (shift >= list.size()) {
+            return;
+        }
+
+        const auto begin_range = list.begin();
+        const auto end_range = std::next(begin_range, shift);
+        list.splice(list.end(), list, begin_range, end_range);
+    }
+
+    static void ListSplice(std::list<T>& in_list, const_list_iterator position,
+                           std::list<T>& out_list, const_list_iterator element) {
+        in_list.splice(position, out_list, element);
+    }
+
+    static const_list_iterator ListIterateTo(const std::list<T>& list, const T& element) {
+        auto it = list.cbegin();
+        while (it != list.cend() && *it != element) {
+            ++it;
+        }
+        return it;
+    }
+
+    std::array<std::list<T>, Depth> levels;
+    u64 used_priorities = 0;
+};
+
+} // namespace Common
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
new file mode 100644
index 000000000..69b7abc54
--- /dev/null
+++ b/src/common/page_table.cpp
@@ -0,0 +1,31 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/page_table.h"
+
+namespace Common {
+
+PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
+
+PageTable::~PageTable() = default;
+
+void PageTable::Resize(std::size_t address_space_width_in_bits) {
+    const std::size_t num_page_table_entries = 1ULL
+                                               << (address_space_width_in_bits - page_size_in_bits);
+
+    pointers.resize(num_page_table_entries);
+    attributes.resize(num_page_table_entries);
+    backing_addr.resize(num_page_table_entries);
+
+    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
+    // vector size is subsequently decreased (via resize), the vector might not automatically
+    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
+    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
+
+    pointers.shrink_to_fit();
+    attributes.shrink_to_fit();
+    backing_addr.shrink_to_fit();
+}
+
+} // namespace Common
diff --git a/src/common/page_table.h b/src/common/page_table.h
new file mode 100644
index 000000000..8b8ff0bb8
--- /dev/null
+++ b/src/common/page_table.h
@@ -0,0 +1,84 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include <boost/icl/interval_map.hpp>
+#include "common/common_types.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+
+enum class PageType : u8 {
+    /// Page is unmapped and should cause an access error.
+    Unmapped,
+    /// Page is mapped to regular memory. This is the only type you can get pointers to.
+    Memory,
+    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
+    /// invalidation
+    RasterizerCachedMemory,
+    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
+    Special,
+    /// Page is allocated for use.
+    Allocated,
+};
+
+struct SpecialRegion {
+    enum class Type {
+        DebugHook,
+        IODevice,
+    } type;
+
+    MemoryHookPointer handler;
+
+    bool operator<(const SpecialRegion& other) const {
+        return std::tie(type, handler) < std::tie(other.type, other.handler);
+    }
+
+    bool operator==(const SpecialRegion& other) const {
+        return std::tie(type, handler) == std::tie(other.type, other.handler);
+    }
+};
+
+/**
+ * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
+ * mimics the way a real CPU page table works.
+ */
+struct PageTable {
+    explicit PageTable(std::size_t page_size_in_bits);
+    ~PageTable();
+
+    /**
+     * Resizes the page table to be able to accomodate enough pages within
+     * a given address space.
+     *
+     * @param address_space_width_in_bits The address size width in bits.
+     */
+    void Resize(std::size_t address_space_width_in_bits);
+
+    /**
+     * Vector of memory pointers backing each page. An entry can only be non-null if the
+     * corresponding entry in the `attributes` vector is of type `Memory`.
+     */
+    std::vector<u8*> pointers;
+
+    /**
+     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
+     * of type `Special`.
+     */
+    boost::icl::interval_map<u64, std::set<SpecialRegion>> special_regions;
+
+    /**
+     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
+     * the corresponding entry in `pointers` MUST be set to null.
+     */
+    std::vector<PageType> attributes;
+
+    std::vector<u64> backing_addr;
+
+    const std::size_t page_size_in_bits{};
+};
+
+} // namespace Common
diff --git a/src/common/swap.h b/src/common/swap.h
index 0e219747f..b3eab1324 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include <type_traits>
+
 #if defined(_MSC_VER)
 #include <cstdlib>
 #elif defined(__linux__)
@@ -170,7 +172,7 @@ struct swap_struct_t {
     using swapped_t = swap_struct_t;
 
 protected:
-    T value = T();
+    T value;
 
     static T swap(T v) {
         return F::swap(v);
@@ -605,52 +607,154 @@ struct swap_double_t {
     }
 };
 
-#if COMMON_LITTLE_ENDIAN
-using u16_le = u16;
-using u32_le = u32;
-using u64_le = u64;
+template <typename T>
+struct swap_enum_t {
+    static_assert(std::is_enum_v<T>);
+    using base = std::underlying_type_t<T>;
+
+public:
+    swap_enum_t() = default;
+    swap_enum_t(const T& v) : value(swap(v)) {}
+
+    swap_enum_t& operator=(const T& v) {
+        value = swap(v);
+        return *this;
+    }
+
+    operator T() const {
+        return swap(value);
+    }
+
+    explicit operator base() const {
+        return static_cast<base>(swap(value));
+    }
 
-using s16_le = s16;
-using s32_le = s32;
-using s64_le = s64;
+protected:
+    T value{};
+    // clang-format off
+    using swap_t = std::conditional_t<
+        std::is_same_v<base, u16>, swap_16_t<u16>, std::conditional_t<
+        std::is_same_v<base, s16>, swap_16_t<s16>, std::conditional_t<
+        std::is_same_v<base, u32>, swap_32_t<u32>, std::conditional_t<
+        std::is_same_v<base, s32>, swap_32_t<s32>, std::conditional_t<
+        std::is_same_v<base, u64>, swap_64_t<u64>, std::conditional_t<
+        std::is_same_v<base, s64>, swap_64_t<s64>, void>>>>>>;
+    // clang-format on
+    static T swap(T x) {
+        return static_cast<T>(swap_t::swap(static_cast<base>(x)));
+    }
+};
 
-using float_le = float;
-using double_le = double;
+struct SwapTag {}; // Use the different endianness from the system
+struct KeepTag {}; // Use the same endianness as the system
 
-using u64_be = swap_struct_t<u64, swap_64_t<u64>>;
-using s64_be = swap_struct_t<s64, swap_64_t<s64>>;
+template <typename T, typename Tag>
+struct AddEndian;
 
-using u32_be = swap_struct_t<u32, swap_32_t<u32>>;
-using s32_be = swap_struct_t<s32, swap_32_t<s32>>;
+// KeepTag specializations
 
-using u16_be = swap_struct_t<u16, swap_16_t<u16>>;
-using s16_be = swap_struct_t<s16, swap_16_t<s16>>;
+template <typename T>
+struct AddEndian<T, KeepTag> {
+    using type = T;
+};
 
-using float_be = swap_struct_t<float, swap_float_t<float>>;
-using double_be = swap_struct_t<double, swap_double_t<double>>;
-#else
+// SwapTag specializations
+
+template <>
+struct AddEndian<u8, SwapTag> {
+    using type = u8;
+};
+
+template <>
+struct AddEndian<u16, SwapTag> {
+    using type = swap_struct_t<u16, swap_16_t<u16>>;
+};
+
+template <>
+struct AddEndian<u32, SwapTag> {
+    using type = swap_struct_t<u32, swap_32_t<u32>>;
+};
 
-using u64_le = swap_struct_t<u64, swap_64_t<u64>>;
-using s64_le = swap_struct_t<s64, swap_64_t<s64>>;
+template <>
+struct AddEndian<u64, SwapTag> {
+    using type = swap_struct_t<u64, swap_64_t<u64>>;
+};
+
+template <>
+struct AddEndian<s8, SwapTag> {
+    using type = s8;
+};
 
-using u32_le = swap_struct_t<u32, swap_32_t<u32>>;
-using s32_le = swap_struct_t<s32, swap_32_t<s32>>;
+template <>
+struct AddEndian<s16, SwapTag> {
+    using type = swap_struct_t<s16, swap_16_t<s16>>;
+};
 
-using u16_le = swap_struct_t<u16, swap_16_t<u16>>;
-using s16_le = swap_struct_t<s16, swap_16_t<s16>>;
+template <>
+struct AddEndian<s32, SwapTag> {
+    using type = swap_struct_t<s32, swap_32_t<s32>>;
+};
+
+template <>
+struct AddEndian<s64, SwapTag> {
+    using type = swap_struct_t<s64, swap_64_t<s64>>;
+};
+
+template <>
+struct AddEndian<float, SwapTag> {
+    using type = swap_struct_t<float, swap_float_t<float>>;
+};
+
+template <>
+struct AddEndian<double, SwapTag> {
+    using type = swap_struct_t<double, swap_double_t<double>>;
+};
+
+template <typename T>
+struct AddEndian<T, SwapTag> {
+    static_assert(std::is_enum_v<T>);
+    using type = swap_enum_t<T>;
+};
 
-using float_le = swap_struct_t<float, swap_float_t<float>>;
-using double_le = swap_struct_t<double, swap_double_t<double>>;
+// Alias LETag/BETag as KeepTag/SwapTag depending on the system
+#if COMMON_LITTLE_ENDIAN
 
-using u16_be = u16;
-using u32_be = u32;
-using u64_be = u64;
+using LETag = KeepTag;
+using BETag = SwapTag;
 
-using s16_be = s16;
-using s32_be = s32;
-using s64_be = s64;
+#else
 
-using float_be = float;
-using double_be = double;
+using BETag = KeepTag;
+using LETag = SwapTag;
 
 #endif
+
+// Aliases for LE types
+using u16_le = AddEndian<u16, LETag>::type;
+using u32_le = AddEndian<u32, LETag>::type;
+using u64_le = AddEndian<u64, LETag>::type;
+
+using s16_le = AddEndian<s16, LETag>::type;
+using s32_le = AddEndian<s32, LETag>::type;
+using s64_le = AddEndian<s64, LETag>::type;
+
+template <typename T>
+using enum_le = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, LETag>::type>;
+
+using float_le = AddEndian<float, LETag>::type;
+using double_le = AddEndian<double, LETag>::type;
+
+// Aliases for BE types
+using u16_be = AddEndian<u16, BETag>::type;
+using u32_be = AddEndian<u32, BETag>::type;
+using u64_be = AddEndian<u64, BETag>::type;
+
+using s16_be = AddEndian<s16, BETag>::type;
+using s32_be = AddEndian<s32, BETag>::type;
+using s64_be = AddEndian<s64, BETag>::type;
+
+template <typename T>
+using enum_be = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, BETag>::type>;
+
+using float_be = AddEndian<float, BETag>::type;
+using double_be = AddEndian<double, BETag>::type;
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index 5144c0d9f..fe7a420cc 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -27,18 +27,6 @@ namespace Common {
 
 #ifdef _MSC_VER
 
-void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
-    SetThreadAffinityMask(thread, mask);
-}
-
-void SetCurrentThreadAffinity(u32 mask) {
-    SetThreadAffinityMask(GetCurrentThread(), mask);
-}
-
-void SwitchCurrentThread() {
-    SwitchToThread();
-}
-
 // Sets the debugger-visible name of the current thread.
 // Uses undocumented (actually, it is now documented) trick.
 // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp
@@ -70,31 +58,6 @@ void SetCurrentThreadName(const char* name) {
 
 #else // !MSVC_VER, so must be POSIX threads
 
-void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
-#ifdef __APPLE__
-    thread_policy_set(pthread_mach_thread_np(thread), THREAD_AFFINITY_POLICY, (integer_t*)&mask, 1);
-#elif (defined __linux__ || defined __FreeBSD__) && !(defined ANDROID)
-    cpu_set_t cpu_set;
-    CPU_ZERO(&cpu_set);
-
-    for (int i = 0; i != sizeof(mask) * 8; ++i)
-        if ((mask >> i) & 1)
-            CPU_SET(i, &cpu_set);
-
-    pthread_setaffinity_np(thread, sizeof(cpu_set), &cpu_set);
-#endif
-}
-
-void SetCurrentThreadAffinity(u32 mask) {
-    SetThreadAffinity(pthread_self(), mask);
-}
-
-#ifndef _WIN32
-void SwitchCurrentThread() {
-    usleep(1000 * 1);
-}
-#endif
-
 // MinGW with the POSIX threading model does not support pthread_setname_np
 #if !defined(_WIN32) || defined(_MSC_VER)
 void SetCurrentThreadName(const char* name) {
diff --git a/src/common/thread.h b/src/common/thread.h
index 2cf74452d..0cfd98be6 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,14 +9,13 @@
 #include <cstddef>
 #include <mutex>
 #include <thread>
-#include "common/common_types.h"
 
 namespace Common {
 
 class Event {
 public:
     void Set() {
-        std::lock_guard<std::mutex> lk(mutex);
+        std::lock_guard lk{mutex};
         if (!is_set) {
             is_set = true;
             condvar.notify_one();
@@ -24,14 +23,14 @@ public:
     }
 
     void Wait() {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         condvar.wait(lk, [&] { return is_set; });
         is_set = false;
     }
 
     template <class Clock, class Duration>
     bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         if (!condvar.wait_until(lk, time, [this] { return is_set; }))
             return false;
         is_set = false;
@@ -39,7 +38,7 @@ public:
     }
 
     void Reset() {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         // no other action required, since wait loops on the predicate and any lingering signal will
         // get cleared on the first iteration
         is_set = false;
@@ -57,7 +56,7 @@ public:
 
     /// Blocks until all "count" threads have called Sync()
     void Sync() {
-        std::unique_lock<std::mutex> lk(mutex);
+        std::unique_lock lk{mutex};
         const std::size_t current_generation = generation;
 
         if (++waiting == count) {
@@ -78,9 +77,6 @@ private:
     std::size_t generation = 0; // Incremented once each time the barrier is used
 };
 
-void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask);
-void SetCurrentThreadAffinity(u32 mask);
-void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms
 void SetCurrentThreadName(const char* name);
 
 } // namespace Common
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
index e7594db68..791f99a8c 100644
--- a/src/common/thread_queue_list.h
+++ b/src/common/thread_queue_list.h
@@ -6,7 +6,6 @@
 
 #include <array>
 #include <deque>
-#include <boost/range/algorithm_ext/erase.hpp>
 
 namespace Common {
 
@@ -111,8 +110,9 @@ struct ThreadQueueList {
     }
 
     void remove(Priority priority, const T& thread_id) {
-        Queue* cur = &queues[priority];
-        boost::remove_erase(cur->data, thread_id);
+        Queue* const cur = &queues[priority];
+        const auto iter = std::remove(cur->data.begin(), cur->data.end(), thread_id);
+        cur->data.erase(iter, cur->data.end());
     }
 
     void rotate(Priority priority) {
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h
index 821e8536a..e714ba5b3 100644
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -78,7 +78,7 @@ public:
 
     T PopWait() {
         if (Empty()) {
-            std::unique_lock<std::mutex> lock(cv_mutex);
+            std::unique_lock lock{cv_mutex};
             cv.wait(lock, [this]() { return !Empty(); });
         }
         T t;
@@ -137,7 +137,7 @@ public:
 
     template <typename Arg>
     void Push(Arg&& t) {
-        std::lock_guard<std::mutex> lock(write_lock);
+        std::lock_guard lock{write_lock};
         spsc_queue.Push(t);
     }
 
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
new file mode 100644
index 000000000..32bf56730
--- /dev/null
+++ b/src/common/uint128.cpp
@@ -0,0 +1,45 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#ifdef _MSC_VER
+#include <intrin.h>
+
+#pragma intrinsic(_umul128)
+#endif
+#include <cstring>
+#include "common/uint128.h"
+
+namespace Common {
+
+u128 Multiply64Into128(u64 a, u64 b) {
+    u128 result;
+#ifdef _MSC_VER
+    result[0] = _umul128(a, b, &result[1]);
+#else
+    unsigned __int128 tmp = a;
+    tmp *= b;
+    std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+    return result;
+}
+
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+    u64 remainder = dividend[0] % divisor;
+    u64 accum = dividend[0] / divisor;
+    if (dividend[1] == 0)
+        return {accum, remainder};
+    // We ignore dividend[1] / divisor as that overflows
+    const u64 first_segment = (dividend[1] % divisor) << 32;
+    accum += (first_segment / divisor) << 32;
+    const u64 second_segment = (first_segment % divisor) << 32;
+    accum += (second_segment / divisor);
+    remainder += second_segment % divisor;
+    if (remainder >= divisor) {
+        accum++;
+        remainder -= divisor;
+    }
+    return {accum, remainder};
+}
+
+} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
new file mode 100644
index 000000000..a3be2a2cb
--- /dev/null
+++ b/src/common/uint128.h
@@ -0,0 +1,19 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <utility>
+#include "common/common_types.h"
+
+namespace Common {
+
+// This function multiplies 2 u64 values and produces a u128 value;
+u128 Multiply64Into128(u64 a, u64 b);
+
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+
+} // namespace Common
diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp
new file mode 100644
index 000000000..60a35c67c
--- /dev/null
+++ b/src/common/zstd_compression.cpp
@@ -0,0 +1,53 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <zstd.h>
+
+#include "common/assert.h"
+#include "common/zstd_compression.h"
+
+namespace Common::Compression {
+
+std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level) {
+    compression_level = std::clamp(compression_level, 1, ZSTD_maxCLevel());
+
+    const std::size_t max_compressed_size = ZSTD_compressBound(source_size);
+    std::vector<u8> compressed(max_compressed_size);
+
+    const std::size_t compressed_size =
+        ZSTD_compress(compressed.data(), compressed.size(), source, source_size, compression_level);
+
+    if (ZSTD_isError(compressed_size)) {
+        // Compression failed
+        return {};
+    }
+
+    compressed.resize(compressed_size);
+
+    return compressed;
+}
+
+std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size) {
+    return CompressDataZSTD(source, source_size, ZSTD_CLEVEL_DEFAULT);
+}
+
+std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed) {
+    const std::size_t decompressed_size =
+        ZSTD_getDecompressedSize(compressed.data(), compressed.size());
+    std::vector<u8> decompressed(decompressed_size);
+
+    const std::size_t uncompressed_result_size = ZSTD_decompress(
+        decompressed.data(), decompressed.size(), compressed.data(), compressed.size());
+
+    if (decompressed_size != uncompressed_result_size || ZSTD_isError(uncompressed_result_size)) {
+        // Decompression failed
+        return {};
+    }
+    return decompressed;
+}
+
+} // namespace Common::Compression
diff --git a/src/common/zstd_compression.h b/src/common/zstd_compression.h
new file mode 100644
index 000000000..e0a64b035
--- /dev/null
+++ b/src/common/zstd_compression.h
@@ -0,0 +1,42 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Common::Compression {
+
+/**
+ * Compresses a source memory region with Zstandard and returns the compressed data in a vector.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ * @param compression_level the used compression level. Should be between 1 and 22.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level);
+
+/**
+ * Compresses a source memory region with Zstandard with the default compression level and returns
+ * the compressed data in a vector.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size);
+
+/**
+ * Decompresses a source memory region with Zstandard and returns the uncompressed data in a vector.
+ *
+ * @param compressed the compressed source memory region.
+ *
+ * @return the decompressed data.
+ */
+std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed);
+
+} // namespace Common::Compression
+\ No newline at end of file
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 8ccb2d5f0..c59107102 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -31,6 +31,8 @@ add_library(core STATIC
     file_sys/bis_factory.h
     file_sys/card_image.cpp
     file_sys/card_image.h
+    file_sys/cheat_engine.cpp
+    file_sys/cheat_engine.h
     file_sys/content_archive.cpp
     file_sys/content_archive.h
     file_sys/control_metadata.cpp
@@ -68,6 +70,8 @@ add_library(core STATIC
     file_sys/system_archive/ng_word.h
     file_sys/system_archive/system_archive.cpp
     file_sys/system_archive/system_archive.h
+    file_sys/system_archive/system_version.cpp
+    file_sys/system_archive/system_version.h
     file_sys/vfs.cpp
     file_sys/vfs.h
     file_sys/vfs_concat.cpp
@@ -107,6 +111,8 @@ add_library(core STATIC
     hle/kernel/client_port.h
     hle/kernel/client_session.cpp
     hle/kernel/client_session.h
+    hle/kernel/code_set.cpp
+    hle/kernel/code_set.h
     hle/kernel/errors.h
     hle/kernel/handle_table.cpp
     hle/kernel/handle_table.h
@@ -140,6 +146,8 @@ add_library(core STATIC
     hle/kernel/svc_wrap.h
     hle/kernel/thread.cpp
     hle/kernel/thread.h
+    hle/kernel/transfer_memory.cpp
+    hle/kernel/transfer_memory.h
     hle/kernel/vm_manager.cpp
     hle/kernel/vm_manager.h
     hle/kernel/wait_object.cpp
@@ -419,8 +427,6 @@ add_library(core STATIC
     loader/deconstructed_rom_directory.h
     loader/elf.cpp
     loader/elf.h
-    loader/linker.cpp
-    loader/linker.h
     loader/loader.cpp
     loader/loader.h
     loader/nax.cpp
@@ -437,8 +443,6 @@ add_library(core STATIC
     loader/xci.h
     memory.cpp
     memory.h
-    memory_hook.cpp
-    memory_hook.h
     memory_setup.h
     perf_stats.cpp
     perf_stats.h
@@ -454,7 +458,7 @@ add_library(core STATIC
 create_target_directory_groups(core)
 
 target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
-target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt lz4_static mbedtls opus unicorn open_source_archives)
+target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt mbedtls opus unicorn open_source_archives)
 if (ENABLE_WEB_SERVICE)
     target_compile_definitions(core PRIVATE -DENABLE_WEB_SERVICE)
     target_link_libraries(core PRIVATE web_service)
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 9b7ca4030..49145911b 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
 #include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
@@ -25,7 +26,6 @@ using Vector = Dynarmic::A64::Vector;
 class ARM_Dynarmic_Callbacks : public Dynarmic::A64::UserCallbacks {
 public:
     explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {}
-    ~ARM_Dynarmic_Callbacks() = default;
 
     u8 MemoryRead8(u64 vaddr) override {
         return Memory::Read8(vaddr);
@@ -119,7 +119,7 @@ public:
         return std::max(parent.core_timing.GetDowncount(), 0);
     }
     u64 GetCNTPCT() override {
-        return parent.core_timing.GetTicks();
+        return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
     }
 
     ARM_Dynarmic& parent;
@@ -151,7 +151,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
     config.tpidr_el0 = &cb->tpidr_el0;
     config.dczid_el0 = 4;
     config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = 19200000; // Value from fusee.
+    config.cntfrq_el0 = Timing::CNTFREQ;
 
     // Unpredictable instructions
     config.define_unpredictable_behaviour = true;
@@ -163,7 +163,6 @@ MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)
 
 void ARM_Dynarmic::Run() {
     MICROPROFILE_SCOPE(ARM_Jit_Dynarmic);
-    ASSERT(Memory::GetCurrentPageTable() == current_page_table);
 
     jit->Run();
 }
@@ -278,7 +277,6 @@ void ARM_Dynarmic::ClearExclusiveState() {
 
 void ARM_Dynarmic::PageTableChanged() {
     jit = MakeJit();
-    current_page_table = Memory::GetCurrentPageTable();
 }
 
 DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 6cc458296..d867c2a50 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,10 +12,6 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"
 
-namespace Memory {
-struct PageTable;
-}
-
 namespace Core::Timing {
 class CoreTiming;
 }
@@ -29,7 +25,7 @@ class ARM_Dynarmic final : public ARM_Interface {
 public:
     ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
                  std::size_t core_index);
-    ~ARM_Dynarmic();
+    ~ARM_Dynarmic() override;
 
     void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                           Kernel::VMAPermission perms) override;
@@ -69,14 +65,12 @@ private:
     std::size_t core_index;
     Timing::CoreTiming& core_timing;
     DynarmicExclusiveMonitor& exclusive_monitor;
-
-    Memory::PageTable* current_page_table = nullptr;
 };
 
 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
 public:
     explicit DynarmicExclusiveMonitor(std::size_t core_count);
-    ~DynarmicExclusiveMonitor();
+    ~DynarmicExclusiveMonitor() override;
 
     void SetExclusive(std::size_t core_index, VAddr addr) override;
     void ClearExclusive() override;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index a542a098b..27309280c 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -192,12 +192,13 @@ void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
     CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
     core_timing.AddTicks(num_instructions);
     if (GDBStub::IsServerEnabled()) {
-        if (last_bkpt_hit) {
+        if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
             uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
         }
+
         Kernel::Thread* thread = Kernel::GetCurrentThread();
         SaveContext(thread->GetContext());
-        if (last_bkpt_hit || GDBStub::GetCpuStepFlag()) {
+        if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) {
             last_bkpt_hit = false;
             GDBStub::Break();
             GDBStub::SendTrap(thread, 5);
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index dbd6955ea..1e44f0736 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -18,7 +18,7 @@ namespace Core {
 class ARM_Unicorn final : public ARM_Interface {
 public:
     explicit ARM_Unicorn(Timing::CoreTiming& core_timing);
-    ~ARM_Unicorn();
+    ~ARM_Unicorn() override;
 
     void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                           Kernel::VMAPermission perms) override;
@@ -50,7 +50,7 @@ private:
     uc_engine* uc{};
     Timing::CoreTiming& core_timing;
     GDBStub::BreakpointAddress last_bkpt{};
-    bool last_bkpt_hit;
+    bool last_bkpt_hit = false;
 };
 
 } // namespace Core
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 89b3fb418..4fe77c25b 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -32,6 +32,7 @@
 #include "core/perf_stats.h"
 #include "core/settings.h"
 #include "core/telemetry_session.h"
+#include "file_sys/cheat_engine.h"
 #include "frontend/applets/profile_select.h"
 #include "frontend/applets/software_keyboard.h"
 #include "frontend/applets/web_browser.h"
@@ -205,6 +206,7 @@ struct System::Impl {
         GDBStub::Shutdown();
         Service::Shutdown();
         service_manager.reset();
+        cheat_engine.reset();
         telemetry_session.reset();
         gpu_core.reset();
 
@@ -255,6 +257,8 @@ struct System::Impl {
     CpuCoreManager cpu_core_manager;
     bool is_powered_on = false;
 
+    std::unique_ptr<FileSys::CheatEngine> cheat_engine;
+
     /// Frontend applets
     std::unique_ptr<Core::Frontend::ProfileSelectApplet> profile_selector;
     std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard;
@@ -453,6 +457,13 @@ Tegra::DebugContext* System::GetGPUDebugContext() const {
     return impl->debug_context.get();
 }
 
+void System::RegisterCheatList(const std::vector<FileSys::CheatList>& list,
+                               const std::string& build_id, VAddr code_region_start,
+                               VAddr code_region_end) {
+    impl->cheat_engine = std::make_unique<FileSys::CheatEngine>(*this, list, build_id,
+                                                                code_region_start, code_region_end);
+}
+
 void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
     impl->virtual_filesystem = std::move(vfs);
 }
diff --git a/src/core/core.h b/src/core/core.h
index ba76a41d8..4d83b93cc 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -20,6 +20,7 @@ class WebBrowserApplet;
 } // namespace Core::Frontend
 
 namespace FileSys {
+class CheatList;
 class VfsFilesystem;
 } // namespace FileSys
 
@@ -253,6 +254,9 @@ public:
 
     std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;
 
+    void RegisterCheatList(const std::vector<FileSys::CheatList>& list, const std::string& build_id,
+                           VAddr code_region_start, VAddr code_region_end);
+
     void SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet);
 
     const Frontend::ProfileSelectApplet& GetProfileSelector() const;
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 1eefed6d0..e75741db0 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -22,7 +22,7 @@
 namespace Core {
 
 void CpuBarrier::NotifyEnd() {
-    std::unique_lock<std::mutex> lock(mutex);
+    std::unique_lock lock{mutex};
     end = true;
     condition.notify_all();
 }
@@ -34,7 +34,7 @@ bool CpuBarrier::Rendezvous() {
     }
 
     if (!end) {
-        std::unique_lock<std::mutex> lock(mutex);
+        std::unique_lock lock{mutex};
 
         --cores_waiting;
         if (!cores_waiting) {
@@ -131,7 +131,7 @@ void Cpu::Reschedule() {
 
     reschedule_pending = false;
     // Lock the global kernel mutex when we manipulate the HLE state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     scheduler->Reschedule();
 }
 
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index a0dd5db24..41adb2302 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -186,7 +186,7 @@ void CoreTiming::Advance() {
         Event evt = std::move(event_queue.front());
         std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
         event_queue.pop_back();
-        evt.type->callback(evt.userdata, static_cast<int>(global_timer - evt.time));
+        evt.type->callback(evt.userdata, global_timer - evt.time);
     }
 
     is_global_timer_sane = false;
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 59163bae1..9d2efde37 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -15,7 +15,7 @@
 namespace Core::Timing {
 
 /// A callback that may be scheduled for a particular core timing event.
-using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
+using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
 
 /// Contains the characteristics of a particular event.
 struct EventType {
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 88ff70233..7942f30d6 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,6 +7,7 @@
 #include <cinttypes>
 #include <limits>
 #include "common/logging/log.h"
+#include "common/uint128.h"
 
 namespace Core::Timing {
 
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
     return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
 }
 
+u64 CpuCyclesToClockCycles(u64 ticks) {
+    const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
+    return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+}
+
 } // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 513cfac1b..679aa3123 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -11,6 +11,7 @@ namespace Core::Timing {
 // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 // The exact value used is of course unverified.
 constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000;           // Value from fusee.
 
 inline s64 msToCycles(int ms) {
     // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
     return cycles * 1000 / BASE_CLOCK_RATE;
 }
 
+u64 CpuCyclesToClockCycles(u64 ticks);
+
 } // namespace Core::Timing
diff --git a/src/core/file_sys/cheat_engine.cpp b/src/core/file_sys/cheat_engine.cpp
new file mode 100644
index 000000000..b06c2f20a
--- /dev/null
+++ b/src/core/file_sys/cheat_engine.cpp
@@ -0,0 +1,492 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <locale>
+#include "common/hex_util.h"
+#include "common/microprofile.h"
+#include "common/swap.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/core_timing_util.h"
+#include "core/file_sys/cheat_engine.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/service/hid/controllers/npad.h"
+#include "core/hle/service/hid/hid.h"
+#include "core/hle/service/sm/sm.h"
+
+namespace FileSys {
+
+constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
+constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
+
+u64 Cheat::Address() const {
+    u64 out;
+    std::memcpy(&out, raw.data(), sizeof(u64));
+    return Common::swap64(out) & 0xFFFFFFFFFF;
+}
+
+u64 Cheat::ValueWidth(u64 offset) const {
+    return Value(offset, width);
+}
+
+u64 Cheat::Value(u64 offset, u64 width) const {
+    u64 out;
+    std::memcpy(&out, raw.data() + offset, sizeof(u64));
+    out = Common::swap64(out);
+    if (width == 8)
+        return out;
+    return out & ((1ull << (width * CHAR_BIT)) - 1);
+}
+
+u32 Cheat::KeypadValue() const {
+    u32 out;
+    std::memcpy(&out, raw.data(), sizeof(u32));
+    return Common::swap32(out) & 0x0FFFFFFF;
+}
+
+void CheatList::SetMemoryParameters(VAddr main_begin, VAddr heap_begin, VAddr main_end,
+                                    VAddr heap_end, MemoryWriter writer, MemoryReader reader) {
+    this->main_region_begin = main_begin;
+    this->main_region_end = main_end;
+    this->heap_region_begin = heap_begin;
+    this->heap_region_end = heap_end;
+    this->writer = writer;
+    this->reader = reader;
+}
+
+MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70));
+
+void CheatList::Execute() {
+    MICROPROFILE_SCOPE(Cheat_Engine);
+
+    std::fill(scratch.begin(), scratch.end(), 0);
+    in_standard = false;
+    for (std::size_t i = 0; i < master_list.size(); ++i) {
+        LOG_DEBUG(Common_Filesystem, "Executing block #{:08X} ({})", i, master_list[i].first);
+        current_block = i;
+        ExecuteBlock(master_list[i].second);
+    }
+
+    in_standard = true;
+    for (std::size_t i = 0; i < standard_list.size(); ++i) {
+        LOG_DEBUG(Common_Filesystem, "Executing block #{:08X} ({})", i, standard_list[i].first);
+        current_block = i;
+        ExecuteBlock(standard_list[i].second);
+    }
+}
+
+CheatList::CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard)
+    : master_list{std::move(master)}, standard_list{std::move(standard)}, system{&system_} {}
+
+bool CheatList::EvaluateConditional(const Cheat& cheat) const {
+    using ComparisonFunction = bool (*)(u64, u64);
+    constexpr std::array<ComparisonFunction, 6> comparison_functions{
+        [](u64 a, u64 b) { return a > b; },  [](u64 a, u64 b) { return a >= b; },
+        [](u64 a, u64 b) { return a < b; },  [](u64 a, u64 b) { return a <= b; },
+        [](u64 a, u64 b) { return a == b; }, [](u64 a, u64 b) { return a != b; },
+    };
+
+    if (cheat.type == CodeType::ConditionalInput) {
+        const auto applet_resource =
+            system->ServiceManager().GetService<Service::HID::Hid>("hid")->GetAppletResource();
+        if (applet_resource == nullptr) {
+            LOG_WARNING(
+                Common_Filesystem,
+                "Attempted to evaluate input conditional, but applet resource is not initialized!");
+            return false;
+        }
+
+        const auto press_state =
+            applet_resource
+                ->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad)
+                .GetAndResetPressState();
+        return ((press_state & cheat.KeypadValue()) & KEYPAD_BITMASK) != 0;
+    }
+
+    ASSERT(cheat.type == CodeType::Conditional);
+
+    const auto offset =
+        cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
+    ASSERT(static_cast<u8>(cheat.comparison_op.Value()) < 6);
+    auto* function = comparison_functions[static_cast<u8>(cheat.comparison_op.Value())];
+    const auto addr = cheat.Address() + offset;
+
+    return function(reader(cheat.width, SanitizeAddress(addr)), cheat.ValueWidth(8));
+}
+
+void CheatList::ProcessBlockPairs(const Block& block) {
+    block_pairs.clear();
+
+    u64 scope = 0;
+    std::map<u64, u64> pairs;
+
+    for (std::size_t i = 0; i < block.size(); ++i) {
+        const auto& cheat = block[i];
+
+        switch (cheat.type) {
+        case CodeType::Conditional:
+        case CodeType::ConditionalInput:
+            pairs.insert_or_assign(scope, i);
+            ++scope;
+            break;
+        case CodeType::EndConditional: {
+            --scope;
+            const auto idx = pairs.at(scope);
+            block_pairs.insert_or_assign(idx, i);
+            break;
+        }
+        case CodeType::Loop: {
+            if (cheat.end_of_loop) {
+                --scope;
+                const auto idx = pairs.at(scope);
+                block_pairs.insert_or_assign(idx, i);
+            } else {
+                pairs.insert_or_assign(scope, i);
+                ++scope;
+            }
+            break;
+        }
+        }
+    }
+}
+
+void CheatList::WriteImmediate(const Cheat& cheat) {
+    const auto offset =
+        cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
+    const auto& register_3 = scratch.at(cheat.register_3);
+
+    const auto addr = cheat.Address() + offset + register_3;
+    LOG_DEBUG(Common_Filesystem, "writing value={:016X} to addr={:016X}", addr,
+              cheat.Value(8, cheat.width));
+    writer(cheat.width, SanitizeAddress(addr), cheat.ValueWidth(8));
+}
+
+void CheatList::BeginConditional(const Cheat& cheat) {
+    if (EvaluateConditional(cheat)) {
+        return;
+    }
+
+    const auto iter = block_pairs.find(current_index);
+    ASSERT(iter != block_pairs.end());
+    current_index = iter->second - 1;
+}
+
+void CheatList::EndConditional(const Cheat& cheat) {
+    LOG_DEBUG(Common_Filesystem, "Ending conditional block.");
+}
+
+void CheatList::Loop(const Cheat& cheat) {
+    if (cheat.end_of_loop.Value())
+        ASSERT(!cheat.end_of_loop.Value());
+
+    auto& register_3 = scratch.at(cheat.register_3);
+    const auto iter = block_pairs.find(current_index);
+    ASSERT(iter != block_pairs.end());
+    ASSERT(iter->first < iter->second);
+
+    const s32 initial_value = static_cast<s32>(cheat.Value(4, sizeof(s32)));
+    for (s32 i = initial_value; i >= 0; --i) {
+        register_3 = static_cast<u64>(i);
+        for (std::size_t c = iter->first + 1; c < iter->second; ++c) {
+            current_index = c;
+            ExecuteSingleCheat(
+                (in_standard ? standard_list : master_list)[current_block].second[c]);
+        }
+    }
+
+    current_index = iter->second;
+}
+
+void CheatList::LoadImmediate(const Cheat& cheat) {
+    auto& register_3 = scratch.at(cheat.register_3);
+
+    LOG_DEBUG(Common_Filesystem, "setting register={:01X} equal to value={:016X}", cheat.register_3,
+              cheat.Value(4, 8));
+    register_3 = cheat.Value(4, 8);
+}
+
+void CheatList::LoadIndexed(const Cheat& cheat) {
+    const auto offset =
+        cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
+    auto& register_3 = scratch.at(cheat.register_3);
+
+    const auto addr = (cheat.load_from_register.Value() ? register_3 : offset) + cheat.Address();
+    LOG_DEBUG(Common_Filesystem, "writing indexed value to register={:01X}, addr={:016X}",
+              cheat.register_3, addr);
+    register_3 = reader(cheat.width, SanitizeAddress(addr));
+}
+
+void CheatList::StoreIndexed(const Cheat& cheat) {
+    const auto& register_3 = scratch.at(cheat.register_3);
+
+    const auto addr =
+        register_3 + (cheat.add_additional_register.Value() ? scratch.at(cheat.register_6) : 0);
+    LOG_DEBUG(Common_Filesystem, "writing value={:016X} to addr={:016X}",
+              cheat.Value(4, cheat.width), addr);
+    writer(cheat.width, SanitizeAddress(addr), cheat.ValueWidth(4));
+}
+
+void CheatList::RegisterArithmetic(const Cheat& cheat) {
+    using ArithmeticFunction = u64 (*)(u64, u64);
+    constexpr std::array<ArithmeticFunction, 5> arithmetic_functions{
+        [](u64 a, u64 b) { return a + b; },  [](u64 a, u64 b) { return a - b; },
+        [](u64 a, u64 b) { return a * b; },  [](u64 a, u64 b) { return a << b; },
+        [](u64 a, u64 b) { return a >> b; },
+    };
+
+    using ArithmeticOverflowCheck = bool (*)(u64, u64);
+    constexpr std::array<ArithmeticOverflowCheck, 5> arithmetic_overflow_checks{
+        [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() - b); },       // a + b
+        [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() + b); },       // a - b
+        [](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() / b); },       // a * b
+        [](u64 a, u64 b) { return b >= 64 || (a & ~((1ull << (64 - b)) - 1)) != 0; }, // a << b
+        [](u64 a, u64 b) { return b >= 64 || (a & ((1ull << b) - 1)) != 0; },         // a >> b
+    };
+
+    static_assert(sizeof(arithmetic_functions) == sizeof(arithmetic_overflow_checks),
+                  "Missing or have extra arithmetic overflow checks compared to functions!");
+
+    auto& register_3 = scratch.at(cheat.register_3);
+
+    ASSERT(static_cast<u8>(cheat.arithmetic_op.Value()) < 5);
+    auto* function = arithmetic_functions[static_cast<u8>(cheat.arithmetic_op.Value())];
+    auto* overflow_function =
+        arithmetic_overflow_checks[static_cast<u8>(cheat.arithmetic_op.Value())];
+    LOG_DEBUG(Common_Filesystem, "performing arithmetic with register={:01X}, value={:016X}",
+              cheat.register_3, cheat.ValueWidth(4));
+
+    if (overflow_function(register_3, cheat.ValueWidth(4))) {
+        LOG_WARNING(Common_Filesystem,
+                    "overflow will occur when performing arithmetic operation={:02X} with operands "
+                    "a={:016X}, b={:016X}!",
+                    static_cast<u8>(cheat.arithmetic_op.Value()), register_3, cheat.ValueWidth(4));
+    }
+
+    register_3 = function(register_3, cheat.ValueWidth(4));
+}
+
+void CheatList::BeginConditionalInput(const Cheat& cheat) {
+    if (EvaluateConditional(cheat))
+        return;
+
+    const auto iter = block_pairs.find(current_index);
+    ASSERT(iter != block_pairs.end());
+    current_index = iter->second - 1;
+}
+
+VAddr CheatList::SanitizeAddress(VAddr in) const {
+    if ((in < main_region_begin || in >= main_region_end) &&
+        (in < heap_region_begin || in >= heap_region_end)) {
+        LOG_ERROR(Common_Filesystem,
+                  "Cheat attempting to access memory at invalid address={:016X}, if this persists, "
+                  "the cheat may be incorrect. However, this may be normal early in execution if "
+                  "the game has not properly set up yet.",
+                  in);
+        return 0; ///< Invalid addresses will hard crash
+    }
+
+    return in;
+}
+
+void CheatList::ExecuteSingleCheat(const Cheat& cheat) {
+    using CheatOperationFunction = void (CheatList::*)(const Cheat&);
+    constexpr std::array<CheatOperationFunction, 9> cheat_operation_functions{
+        &CheatList::WriteImmediate,        &CheatList::BeginConditional,
+        &CheatList::EndConditional,        &CheatList::Loop,
+        &CheatList::LoadImmediate,         &CheatList::LoadIndexed,
+        &CheatList::StoreIndexed,          &CheatList::RegisterArithmetic,
+        &CheatList::BeginConditionalInput,
+    };
+
+    const auto index = static_cast<u8>(cheat.type.Value());
+    ASSERT(index < sizeof(cheat_operation_functions));
+    const auto op = cheat_operation_functions[index];
+    (this->*op)(cheat);
+}
+
+void CheatList::ExecuteBlock(const Block& block) {
+    encountered_loops.clear();
+
+    ProcessBlockPairs(block);
+    for (std::size_t i = 0; i < block.size(); ++i) {
+        current_index = i;
+        ExecuteSingleCheat(block[i]);
+        i = current_index;
+    }
+}
+
+CheatParser::~CheatParser() = default;
+
+CheatList CheatParser::MakeCheatList(const Core::System& system, CheatList::ProgramSegment master,
+                                     CheatList::ProgramSegment standard) const {
+    return {system, std::move(master), std::move(standard)};
+}
+
+TextCheatParser::~TextCheatParser() = default;
+
+CheatList TextCheatParser::Parse(const Core::System& system, const std::vector<u8>& data) const {
+    std::stringstream ss;
+    ss.write(reinterpret_cast<const char*>(data.data()), data.size());
+
+    std::vector<std::string> lines;
+    std::string stream_line;
+    while (std::getline(ss, stream_line)) {
+        // Remove a trailing \r
+        if (!stream_line.empty() && stream_line.back() == '\r')
+            stream_line.pop_back();
+        lines.push_back(std::move(stream_line));
+    }
+
+    CheatList::ProgramSegment master_list;
+    CheatList::ProgramSegment standard_list;
+
+    for (std::size_t i = 0; i < lines.size(); ++i) {
+        auto line = lines[i];
+
+        if (!line.empty() && (line[0] == '[' || line[0] == '{')) {
+            const auto master = line[0] == '{';
+            const auto begin = master ? line.find('{') : line.find('[');
+            const auto end = master ? line.rfind('}') : line.rfind(']');
+
+            ASSERT(begin != std::string::npos && end != std::string::npos);
+
+            const std::string patch_name{line.begin() + begin + 1, line.begin() + end};
+            CheatList::Block block{};
+
+            while (i < lines.size() - 1) {
+                line = lines[++i];
+                if (!line.empty() && (line[0] == '[' || line[0] == '{')) {
+                    --i;
+                    break;
+                }
+
+                if (line.size() < 8)
+                    continue;
+
+                Cheat out{};
+                out.raw = ParseSingleLineCheat(line);
+                block.push_back(out);
+            }
+
+            (master ? master_list : standard_list).emplace_back(patch_name, block);
+        }
+    }
+
+    return MakeCheatList(system, master_list, standard_list);
+}
+
+std::array<u8, 16> TextCheatParser::ParseSingleLineCheat(const std::string& line) const {
+    std::array<u8, 16> out{};
+
+    if (line.size() < 8)
+        return out;
+
+    const auto word1 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data(), 8});
+    std::memcpy(out.data(), word1.data(), sizeof(u32));
+
+    if (line.size() < 17 || line[8] != ' ')
+        return out;
+
+    const auto word2 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 9, 8});
+    std::memcpy(out.data() + sizeof(u32), word2.data(), sizeof(u32));
+
+    if (line.size() < 26 || line[17] != ' ') {
+        // Perform shifting in case value is truncated early.
+        const auto type = static_cast<CodeType>((out[0] & 0xF0) >> 4);
+        if (type == CodeType::Loop || type == CodeType::LoadImmediate ||
+            type == CodeType::StoreIndexed || type == CodeType::RegisterArithmetic) {
+            std::memcpy(out.data() + 8, out.data() + 4, sizeof(u32));
+            std::memset(out.data() + 4, 0, sizeof(u32));
+        }
+
+        return out;
+    }
+
+    const auto word3 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 18, 8});
+    std::memcpy(out.data() + 2 * sizeof(u32), word3.data(), sizeof(u32));
+
+    if (line.size() < 35 || line[26] != ' ') {
+        // Perform shifting in case value is truncated early.
+        const auto type = static_cast<CodeType>((out[0] & 0xF0) >> 4);
+        if (type == CodeType::WriteImmediate || type == CodeType::Conditional) {
+            std::memcpy(out.data() + 12, out.data() + 8, sizeof(u32));
+            std::memset(out.data() + 8, 0, sizeof(u32));
+        }
+
+        return out;
+    }
+
+    const auto word4 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 27, 8});
+    std::memcpy(out.data() + 3 * sizeof(u32), word4.data(), sizeof(u32));
+
+    return out;
+}
+
+namespace {
+u64 MemoryReadImpl(u32 width, VAddr addr) {
+    switch (width) {
+    case 1:
+        return Memory::Read8(addr);
+    case 2:
+        return Memory::Read16(addr);
+    case 4:
+        return Memory::Read32(addr);
+    case 8:
+        return Memory::Read64(addr);
+    default:
+        UNREACHABLE();
+        return 0;
+    }
+}
+
+void MemoryWriteImpl(u32 width, VAddr addr, u64 value) {
+    switch (width) {
+    case 1:
+        Memory::Write8(addr, static_cast<u8>(value));
+        break;
+    case 2:
+        Memory::Write16(addr, static_cast<u16>(value));
+        break;
+    case 4:
+        Memory::Write32(addr, static_cast<u32>(value));
+        break;
+    case 8:
+        Memory::Write64(addr, value);
+        break;
+    default:
+        UNREACHABLE();
+    }
+}
+} // Anonymous namespace
+
+CheatEngine::CheatEngine(Core::System& system, std::vector<CheatList> cheats_,
+                         const std::string& build_id, VAddr code_region_start,
+                         VAddr code_region_end)
+    : cheats{std::move(cheats_)}, core_timing{system.CoreTiming()} {
+    event = core_timing.RegisterEvent(
+        "CheatEngine::FrameCallback::" + build_id,
+        [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
+    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);
+
+    const auto& vm_manager = system.CurrentProcess()->VMManager();
+    for (auto& list : this->cheats) {
+        list.SetMemoryParameters(code_region_start, vm_manager.GetHeapRegionBaseAddress(),
+                                 code_region_end, vm_manager.GetHeapRegionEndAddress(),
+                                 &MemoryWriteImpl, &MemoryReadImpl);
+    }
+}
+
+CheatEngine::~CheatEngine() {
+    core_timing.UnscheduleEvent(event, 0);
+}
+
+void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
+    for (auto& list : cheats) {
+        list.Execute();
+    }
+
+    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event);
+}
+
+} // namespace FileSys
diff --git a/src/core/file_sys/cheat_engine.h b/src/core/file_sys/cheat_engine.h
new file mode 100644
index 000000000..ac22a82cb
--- /dev/null
+++ b/src/core/file_sys/cheat_engine.h
@@ -0,0 +1,234 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <map>
+#include <set>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Core {
+class System;
+}
+
+namespace Core::Timing {
+class CoreTiming;
+struct EventType;
+} // namespace Core::Timing
+
+namespace FileSys {
+
+enum class CodeType : u32 {
+    // 0TMR00AA AAAAAAAA YYYYYYYY YYYYYYYY
+    // Writes a T sized value Y to the address A added to the value of register R in memory domain M
+    WriteImmediate = 0,
+
+    // 1TMC00AA AAAAAAAA YYYYYYYY YYYYYYYY
+    // Compares the T sized value Y to the value at address A in memory domain M using the
+    // conditional function C. If success, continues execution. If failure, jumps to the matching
+    // EndConditional statement.
+    Conditional = 1,
+
+    // 20000000
+    // Terminates a Conditional or ConditionalInput block.
+    EndConditional = 2,
+
+    // 300R0000 VVVVVVVV
+    // Starts looping V times, storing the current count in register R.
+    // Loop block is terminated with a matching 310R0000.
+    Loop = 3,
+
+    // 400R0000 VVVVVVVV VVVVVVVV
+    // Sets the value of register R to the value V.
+    LoadImmediate = 4,
+
+    // 5TMRI0AA AAAAAAAA
+    // Sets the value of register R to the value of width T at address A in memory domain M, with
+    // the current value of R added to the address if I == 1.
+    LoadIndexed = 5,
+
+    // 6T0RIFG0 VVVVVVVV VVVVVVVV
+    // Writes the value V of width T to the memory address stored in register R. Adds the value of
+    // register G to the final calculation if F is nonzero. Increments the value of register R by T
+    // after operation if I is nonzero.
+    StoreIndexed = 6,
+
+    // 7T0RA000 VVVVVVVV
+    // Performs the arithmetic operation A on the value in register R and the value V of width T,
+    // storing the result in register R.
+    RegisterArithmetic = 7,
+
+    // 8KKKKKKK
+    // Checks to see if any of the buttons defined by the bitmask K are pressed. If any are,
+    // execution continues. If none are, execution skips to the next EndConditional command.
+    ConditionalInput = 8,
+};
+
+enum class MemoryType : u32 {
+    // Addressed relative to start of main NSO
+    MainNSO = 0,
+
+    // Addressed relative to start of heap
+    Heap = 1,
+};
+
+enum class ArithmeticOp : u32 {
+    Add = 0,
+    Sub = 1,
+    Mult = 2,
+    LShift = 3,
+    RShift = 4,
+};
+
+enum class ComparisonOp : u32 {
+    GreaterThan = 1,
+    GreaterThanEqual = 2,
+    LessThan = 3,
+    LessThanEqual = 4,
+    Equal = 5,
+    Inequal = 6,
+};
+
+union Cheat {
+    std::array<u8, 16> raw;
+
+    BitField<4, 4, CodeType> type;
+    BitField<0, 4, u32> width; // Can be 1, 2, 4, or 8. Measured in bytes.
+    BitField<0, 4, u32> end_of_loop;
+    BitField<12, 4, MemoryType> memory_type;
+    BitField<8, 4, u32> register_3;
+    BitField<8, 4, ComparisonOp> comparison_op;
+    BitField<20, 4, u32> load_from_register;
+    BitField<20, 4, u32> increment_register;
+    BitField<20, 4, ArithmeticOp> arithmetic_op;
+    BitField<16, 4, u32> add_additional_register;
+    BitField<28, 4, u32> register_6;
+
+    u64 Address() const;
+    u64 ValueWidth(u64 offset) const;
+    u64 Value(u64 offset, u64 width) const;
+    u32 KeypadValue() const;
+};
+
+class CheatParser;
+
+// Represents a full collection of cheats for a game. The Execute function should be called every
+// interval that all cheats should be executed. Clients should not directly instantiate this class
+// (hence private constructor), they should instead receive an instance from CheatParser, which
+// guarantees the list is always in an acceptable state.
+class CheatList {
+public:
+    friend class CheatParser;
+
+    using Block = std::vector<Cheat>;
+    using ProgramSegment = std::vector<std::pair<std::string, Block>>;
+
+    // (width in bytes, address, value)
+    using MemoryWriter = void (*)(u32, VAddr, u64);
+    // (width in bytes, address) -> value
+    using MemoryReader = u64 (*)(u32, VAddr);
+
+    void SetMemoryParameters(VAddr main_begin, VAddr heap_begin, VAddr main_end, VAddr heap_end,
+                             MemoryWriter writer, MemoryReader reader);
+
+    void Execute();
+
+private:
+    CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard);
+
+    void ProcessBlockPairs(const Block& block);
+    void ExecuteSingleCheat(const Cheat& cheat);
+
+    void ExecuteBlock(const Block& block);
+
+    bool EvaluateConditional(const Cheat& cheat) const;
+
+    // Individual cheat operations
+    void WriteImmediate(const Cheat& cheat);
+    void BeginConditional(const Cheat& cheat);
+    void EndConditional(const Cheat& cheat);
+    void Loop(const Cheat& cheat);
+    void LoadImmediate(const Cheat& cheat);
+    void LoadIndexed(const Cheat& cheat);
+    void StoreIndexed(const Cheat& cheat);
+    void RegisterArithmetic(const Cheat& cheat);
+    void BeginConditionalInput(const Cheat& cheat);
+
+    VAddr SanitizeAddress(VAddr in) const;
+
+    // Master Codes are defined as codes that cannot be disabled and are run prior to all
+    // others.
+    ProgramSegment master_list;
+    // All other codes
+    ProgramSegment standard_list;
+
+    bool in_standard = false;
+
+    // 16 (0x0-0xF) scratch registers that can be used by cheats
+    std::array<u64, 16> scratch{};
+
+    MemoryWriter writer = nullptr;
+    MemoryReader reader = nullptr;
+
+    u64 main_region_begin{};
+    u64 heap_region_begin{};
+    u64 main_region_end{};
+    u64 heap_region_end{};
+
+    u64 current_block{};
+    // The current index of the cheat within the current Block
+    u64 current_index{};
+
+    // The 'stack' of the program. When a conditional or loop statement is encountered, its index is
+    // pushed onto this queue. When a end block is encountered, the condition is checked.
+    std::map<u64, u64> block_pairs;
+
+    std::set<u64> encountered_loops;
+
+    const Core::System* system;
+};
+
+// Intermediary class that parses a text file or other disk format for storing cheats into a
+// CheatList object, that can be used for execution.
+class CheatParser {
+public:
+    virtual ~CheatParser();
+
+    virtual CheatList Parse(const Core::System& system, const std::vector<u8>& data) const = 0;
+
+protected:
+    CheatList MakeCheatList(const Core::System& system_, CheatList::ProgramSegment master,
+                            CheatList::ProgramSegment standard) const;
+};
+
+// CheatParser implementation that parses text files
+class TextCheatParser final : public CheatParser {
+public:
+    ~TextCheatParser() override;
+
+    CheatList Parse(const Core::System& system, const std::vector<u8>& data) const override;
+
+private:
+    std::array<u8, 16> ParseSingleLineCheat(const std::string& line) const;
+};
+
+// Class that encapsulates a CheatList and manages its interaction with memory and CoreTiming
+class CheatEngine final {
+public:
+    CheatEngine(Core::System& system_, std::vector<CheatList> cheats_, const std::string& build_id,
+                VAddr code_region_start, VAddr code_region_end);
+    ~CheatEngine();
+
+private:
+    void FrameCallback(u64 userdata, s64 cycles_late);
+
+    std::vector<CheatList> cheats;
+
+    Core::Timing::EventType* event;
+    Core::Timing::CoreTiming& core_timing;
+};
+
+} // namespace FileSys
diff --git a/src/core/file_sys/content_archive.h b/src/core/file_sys/content_archive.h
index 5d4d05c82..15b9e6624 100644
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -24,13 +24,26 @@ namespace FileSys {
 
 union NCASectionHeader;
 
+/// Describes the type of content within an NCA archive.
 enum class NCAContentType : u8 {
+    /// Executable-related data
     Program = 0,
+
+    /// Metadata.
     Meta = 1,
+
+    /// Access control data.
     Control = 2,
+
+    /// Information related to the game manual
+    /// e.g. Legal information, etc.
     Manual = 3,
+
+    /// System data.
     Data = 4,
-    Data_Unknown5 = 5, ///< Seems to be used on some system archives
+
+    /// Data that can be accessed by applications.
+    PublicData = 5,
 };
 
 enum class NCASectionCryptoType : u8 {
diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp
index 83c184750..60ea9ad12 100644
--- a/src/core/file_sys/control_metadata.cpp
+++ b/src/core/file_sys/control_metadata.cpp
@@ -67,7 +67,7 @@ std::string NACP::GetDeveloperName(Language language) const {
 }
 
 u64 NACP::GetTitleId() const {
-    return raw.title_id;
+    return raw.save_data_owner_id;
 }
 
 u64 NACP::GetDLCBaseTitleId() const {
@@ -80,11 +80,11 @@ std::string NACP::GetVersionString() const {
 }
 
 u64 NACP::GetDefaultNormalSaveSize() const {
-    return raw.normal_save_data_size;
+    return raw.user_account_save_data_size;
 }
 
 u64 NACP::GetDefaultJournalSaveSize() const {
-    return raw.journal_sava_data_size;
+    return raw.user_account_save_data_journal_size;
 }
 
 std::vector<u8> NACP::GetRawBytes() const {
diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h
index 7b9cdc910..280710ddf 100644
--- a/src/core/file_sys/control_metadata.h
+++ b/src/core/file_sys/control_metadata.h
@@ -38,23 +38,35 @@ struct RawNACP {
     u8 video_capture_mode;
     bool data_loss_confirmation;
     INSERT_PADDING_BYTES(1);
-    u64_le title_id;
+    u64_le presence_group_id;
     std::array<u8, 0x20> rating_age;
     std::array<char, 0x10> version_string;
     u64_le dlc_base_title_id;
-    u64_le title_id_2;
-    u64_le normal_save_data_size;
-    u64_le journal_sava_data_size;
-    INSERT_PADDING_BYTES(0x18);
-    u64_le product_code;
+    u64_le save_data_owner_id;
+    u64_le user_account_save_data_size;
+    u64_le user_account_save_data_journal_size;
+    u64_le device_save_data_size;
+    u64_le device_save_data_journal_size;
+    u64_le bcat_delivery_cache_storage_size;
+    char application_error_code_category[8];
     std::array<u64_le, 0x8> local_communication;
     u8 logo_type;
     u8 logo_handling;
     bool runtime_add_on_content_install;
     INSERT_PADDING_BYTES(5);
-    u64_le title_id_update;
-    std::array<u8, 0x40> bcat_passphrase;
-    INSERT_PADDING_BYTES(0xEC0);
+    u64_le seed_for_pseudo_device_id;
+    std::array<u8, 0x41> bcat_passphrase;
+    INSERT_PADDING_BYTES(7);
+    u64_le user_account_save_data_max_size;
+    u64_le user_account_save_data_max_journal_size;
+    u64_le device_save_data_max_size;
+    u64_le device_save_data_max_journal_size;
+    u64_le temporary_storage_size;
+    u64_le cache_storage_size;
+    u64_le cache_storage_journal_size;
+    u64_le cache_storage_data_and_journal_max_size;
+    u64_le cache_storage_max_index;
+    INSERT_PADDING_BYTES(0xE70);
 };
 static_assert(sizeof(RawNACP) == 0x4000, "RawNACP has incorrect size.");
 
diff --git a/src/core/file_sys/errors.h b/src/core/file_sys/errors.h
index e4a4ee4ab..bb4654366 100644
--- a/src/core/file_sys/errors.h
+++ b/src/core/file_sys/errors.h
@@ -11,6 +11,9 @@ namespace FileSys {
 constexpr ResultCode ERROR_PATH_NOT_FOUND{ErrorModule::FS, 1};
 constexpr ResultCode ERROR_ENTITY_NOT_FOUND{ErrorModule::FS, 1002};
 constexpr ResultCode ERROR_SD_CARD_NOT_FOUND{ErrorModule::FS, 2001};
+constexpr ResultCode ERROR_OUT_OF_BOUNDS{ErrorModule::FS, 3005};
+constexpr ResultCode ERROR_FAILED_MOUNT_ARCHIVE{ErrorModule::FS, 3223};
+constexpr ResultCode ERROR_INVALID_ARGUMENT{ErrorModule::FS, 6001};
 constexpr ResultCode ERROR_INVALID_OFFSET{ErrorModule::FS, 6061};
 constexpr ResultCode ERROR_INVALID_SIZE{ErrorModule::FS, 6062};
 
diff --git a/src/core/file_sys/fsmitm_romfsbuild.cpp b/src/core/file_sys/fsmitm_romfsbuild.cpp
index 47b7526c7..d126ae8dd 100644
--- a/src/core/file_sys/fsmitm_romfsbuild.cpp
+++ b/src/core/file_sys/fsmitm_romfsbuild.cpp
@@ -23,6 +23,7 @@
  */
 
 #include <cstring>
+#include <string_view>
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "core/file_sys/fsmitm_romfsbuild.h"
@@ -97,7 +98,8 @@ struct RomFSBuildFileContext {
     VirtualFile source;
 };
 
-static u32 romfs_calc_path_hash(u32 parent, std::string path, u32 start, std::size_t path_len) {
+static u32 romfs_calc_path_hash(u32 parent, std::string_view path, u32 start,
+                                std::size_t path_len) {
     u32 hash = parent ^ 123456789;
     for (u32 i = 0; i < path_len; i++) {
         hash = (hash >> 5) | (hash << 27);
diff --git a/src/core/file_sys/nca_metadata.cpp b/src/core/file_sys/nca_metadata.cpp
index 6f34b7836..93d0df6b9 100644
--- a/src/core/file_sys/nca_metadata.cpp
+++ b/src/core/file_sys/nca_metadata.cpp
@@ -10,14 +10,6 @@
 
 namespace FileSys {
 
-bool operator>=(TitleType lhs, TitleType rhs) {
-    return static_cast<std::size_t>(lhs) >= static_cast<std::size_t>(rhs);
-}
-
-bool operator<=(TitleType lhs, TitleType rhs) {
-    return static_cast<std::size_t>(lhs) <= static_cast<std::size_t>(rhs);
-}
-
 CNMT::CNMT(VirtualFile file) {
     if (file->ReadObject(&header) != sizeof(CNMTHeader))
         return;
diff --git a/src/core/file_sys/nca_metadata.h b/src/core/file_sys/nca_metadata.h
index a05d155f4..50bf38471 100644
--- a/src/core/file_sys/nca_metadata.h
+++ b/src/core/file_sys/nca_metadata.h
@@ -29,9 +29,6 @@ enum class TitleType : u8 {
     DeltaTitle = 0x83,
 };
 
-bool operator>=(TitleType lhs, TitleType rhs);
-bool operator<=(TitleType lhs, TitleType rhs);
-
 enum class ContentRecordType : u8 {
     Meta = 0,
     Program = 1,
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 61706966e..e11217708 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -7,6 +7,7 @@
 #include <cstddef>
 #include <cstring>
 
+#include "common/file_util.h"
 #include "common/hex_util.h"
 #include "common/logging/log.h"
 #include "core/file_sys/content_archive.h"
@@ -19,6 +20,7 @@
 #include "core/file_sys/vfs_vector.h"
 #include "core/hle/service/filesystem/filesystem.h"
 #include "core/loader/loader.h"
+#include "core/loader/nso.h"
 #include "core/settings.h"
 
 namespace FileSys {
@@ -31,14 +33,6 @@ constexpr std::array<const char*, 14> EXEFS_FILE_NAMES{
     "subsdk3", "subsdk4",   "subsdk5", "subsdk6", "subsdk7", "subsdk8", "subsdk9",
 };
 
-struct NSOBuildHeader {
-    u32_le magic;
-    INSERT_PADDING_BYTES(0x3C);
-    std::array<u8, 0x20> build_id;
-    INSERT_PADDING_BYTES(0xA0);
-};
-static_assert(sizeof(NSOBuildHeader) == 0x100, "NSOBuildHeader has incorrect size.");
-
 std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
     std::array<u8, sizeof(u32)> bytes{};
     bytes[0] = version % SINGLE_BYTE_MODULUS;
@@ -162,14 +156,16 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
 }
 
 std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
-    if (nso.size() < 0x100)
+    if (nso.size() < sizeof(Loader::NSOHeader)) {
         return nso;
+    }
 
-    NSOBuildHeader header;
-    std::memcpy(&header, nso.data(), sizeof(NSOBuildHeader));
+    Loader::NSOHeader header;
+    std::memcpy(&header, nso.data(), sizeof(header));
 
-    if (header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
+    if (header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
         return nso;
+    }
 
     const auto build_id_raw = Common::HexArrayToString(header.build_id);
     const auto build_id = build_id_raw.substr(0, build_id_raw.find_last_not_of('0') + 1);
@@ -212,9 +208,11 @@ std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
         }
     }
 
-    if (out.size() < 0x100)
+    if (out.size() < sizeof(Loader::NSOHeader)) {
         return nso;
-    std::memcpy(out.data(), &header, sizeof(NSOBuildHeader));
+    }
+
+    std::memcpy(out.data(), &header, sizeof(header));
     return out;
 }
 
@@ -232,6 +230,57 @@ bool PatchManager::HasNSOPatch(const std::array<u8, 32>& build_id_) const {
     return !CollectPatches(patch_dirs, build_id).empty();
 }
 
+static std::optional<CheatList> ReadCheatFileFromFolder(const Core::System& system, u64 title_id,
+                                                        const std::array<u8, 0x20>& build_id_,
+                                                        const VirtualDir& base_path, bool upper) {
+    const auto build_id_raw = Common::HexArrayToString(build_id_, upper);
+    const auto build_id = build_id_raw.substr(0, sizeof(u64) * 2);
+    const auto file = base_path->GetFile(fmt::format("{}.txt", build_id));
+
+    if (file == nullptr) {
+        LOG_INFO(Common_Filesystem, "No cheats file found for title_id={:016X}, build_id={}",
+                 title_id, build_id);
+        return std::nullopt;
+    }
+
+    std::vector<u8> data(file->GetSize());
+    if (file->Read(data.data(), data.size()) != data.size()) {
+        LOG_INFO(Common_Filesystem, "Failed to read cheats file for title_id={:016X}, build_id={}",
+                 title_id, build_id);
+        return std::nullopt;
+    }
+
+    TextCheatParser parser;
+    return parser.Parse(system, data);
+}
+
+std::vector<CheatList> PatchManager::CreateCheatList(const Core::System& system,
+                                                     const std::array<u8, 32>& build_id_) const {
+    const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
+    auto patch_dirs = load_dir->GetSubdirectories();
+    std::sort(patch_dirs.begin(), patch_dirs.end(),
+              [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
+
+    std::vector<CheatList> out;
+    out.reserve(patch_dirs.size());
+    for (const auto& subdir : patch_dirs) {
+        auto cheats_dir = subdir->GetSubdirectory("cheats");
+        if (cheats_dir != nullptr) {
+            auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
+            if (res.has_value()) {
+                out.push_back(std::move(*res));
+                continue;
+            }
+
+            res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, false);
+            if (res.has_value())
+                out.push_back(std::move(*res));
+        }
+    }
+
+    return out;
+}
+
 static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type) {
     const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
     if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
@@ -403,6 +452,8 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
             }
             if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs")))
                 AppendCommaIfNotEmpty(types, "LayeredFS");
+            if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats")))
+                AppendCommaIfNotEmpty(types, "Cheats");
 
             if (types.empty())
                 continue;
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index b8a1652fd..de2672c76 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -8,9 +8,14 @@
 #include <memory>
 #include <string>
 #include "common/common_types.h"
+#include "core/file_sys/cheat_engine.h"
 #include "core/file_sys/nca_metadata.h"
 #include "core/file_sys/vfs.h"
 
+namespace Core {
+class System;
+}
+
 namespace FileSys {
 
 class NCA;
@@ -45,6 +50,10 @@ public:
     // Used to prevent expensive copies in NSO loader.
     bool HasNSOPatch(const std::array<u8, 0x20>& build_id) const;
 
+    // Creates a CheatList object with all
+    std::vector<CheatList> CreateCheatList(const Core::System& system,
+                                           const std::array<u8, 0x20>& build_id) const;
+
     // Currently tracked RomFS patches:
     // - Game Updates
     // - LayeredFS
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index d3e00437f..d863253f8 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -3,7 +3,6 @@
 // Refer to the license.txt file included.
 
 #include <cstddef>
-#include <cstring>
 #include <vector>
 
 #include "common/logging/log.h"
@@ -17,28 +16,30 @@ ProgramMetadata::ProgramMetadata() = default;
 ProgramMetadata::~ProgramMetadata() = default;
 
 Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
-    std::size_t total_size = static_cast<std::size_t>(file->GetSize());
-    if (total_size < sizeof(Header))
+    const std::size_t total_size = file->GetSize();
+    if (total_size < sizeof(Header)) {
         return Loader::ResultStatus::ErrorBadNPDMHeader;
+    }
 
-    // TODO(DarkLordZach): Use ReadObject when Header/AcidHeader becomes trivially copyable.
-    std::vector<u8> npdm_header_data = file->ReadBytes(sizeof(Header));
-    if (sizeof(Header) != npdm_header_data.size())
+    if (sizeof(Header) != file->ReadObject(&npdm_header)) {
         return Loader::ResultStatus::ErrorBadNPDMHeader;
-    std::memcpy(&npdm_header, npdm_header_data.data(), sizeof(Header));
+    }
 
-    std::vector<u8> acid_header_data = file->ReadBytes(sizeof(AcidHeader), npdm_header.acid_offset);
-    if (sizeof(AcidHeader) != acid_header_data.size())
+    if (sizeof(AcidHeader) != file->ReadObject(&acid_header, npdm_header.acid_offset)) {
         return Loader::ResultStatus::ErrorBadACIDHeader;
-    std::memcpy(&acid_header, acid_header_data.data(), sizeof(AcidHeader));
+    }
 
-    if (sizeof(AciHeader) != file->ReadObject(&aci_header, npdm_header.aci_offset))
+    if (sizeof(AciHeader) != file->ReadObject(&aci_header, npdm_header.aci_offset)) {
         return Loader::ResultStatus::ErrorBadACIHeader;
+    }
 
-    if (sizeof(FileAccessControl) != file->ReadObject(&acid_file_access, acid_header.fac_offset))
+    if (sizeof(FileAccessControl) != file->ReadObject(&acid_file_access, acid_header.fac_offset)) {
         return Loader::ResultStatus::ErrorBadFileAccessControl;
-    if (sizeof(FileAccessHeader) != file->ReadObject(&aci_file_access, aci_header.fah_offset))
+    }
+
+    if (sizeof(FileAccessHeader) != file->ReadObject(&aci_file_access, aci_header.fah_offset)) {
         return Loader::ResultStatus::ErrorBadFileAccessHeader;
+    }
 
     aci_kernel_capabilities.resize(aci_header.kac_size / sizeof(u32));
     const u64 read_size = aci_header.kac_size;
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 0033ba347..7de5b9cf9 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -58,7 +58,6 @@ public:
     void Print() const;
 
 private:
-    // TODO(DarkLordZach): BitField is not trivially copyable.
     struct Header {
         std::array<char, 4> magic;
         std::array<u8, 8> reserved;
@@ -85,7 +84,6 @@ private:
 
     static_assert(sizeof(Header) == 0x80, "NPDM header structure size is wrong");
 
-    // TODO(DarkLordZach): BitField is not trivially copyable.
     struct AcidHeader {
         std::array<u8, 0x100> signature;
         std::array<u8, 0x100> nca_modulus;
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp
index 128199063..1c6bacace 100644
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -94,7 +94,7 @@ static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
     case NCAContentType::Control:
         return ContentRecordType::Control;
     case NCAContentType::Data:
-    case NCAContentType::Data_Unknown5:
+    case NCAContentType::PublicData:
         return ContentRecordType::Data;
     case NCAContentType::Manual:
         // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal.
diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp
index 1913dc956..7974b031d 100644
--- a/src/core/file_sys/savedata_factory.cpp
+++ b/src/core/file_sys/savedata_factory.cpp
@@ -16,8 +16,10 @@ namespace FileSys {
 constexpr char SAVE_DATA_SIZE_FILENAME[] = ".yuzu_save_size";
 
 std::string SaveDataDescriptor::DebugInfo() const {
-    return fmt::format("[type={:02X}, title_id={:016X}, user_id={:016X}{:016X}, save_id={:016X}]",
-                       static_cast<u8>(type), title_id, user_id[1], user_id[0], save_id);
+    return fmt::format("[type={:02X}, title_id={:016X}, user_id={:016X}{:016X}, save_id={:016X}, "
+                       "rank={}, index={}]",
+                       static_cast<u8>(type), title_id, user_id[1], user_id[0], save_id,
+                       static_cast<u8>(rank), index);
 }
 
 SaveDataFactory::SaveDataFactory(VirtualDir save_directory) : dir(std::move(save_directory)) {
@@ -28,7 +30,7 @@ SaveDataFactory::SaveDataFactory(VirtualDir save_directory) : dir(std::move(save
 
 SaveDataFactory::~SaveDataFactory() = default;
 
-ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, SaveDataDescriptor meta) {
+ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, const SaveDataDescriptor& meta) {
     if (meta.type == SaveDataType::SystemSaveData || meta.type == SaveDataType::SaveData) {
         if (meta.zero_1 != 0) {
             LOG_WARNING(Service_FS,
diff --git a/src/core/file_sys/savedata_factory.h b/src/core/file_sys/savedata_factory.h
index 3a1caf292..b73654571 100644
--- a/src/core/file_sys/savedata_factory.h
+++ b/src/core/file_sys/savedata_factory.h
@@ -32,12 +32,19 @@ enum class SaveDataType : u8 {
     CacheStorage = 5,
 };
 
+enum class SaveDataRank : u8 {
+    Primary,
+    Secondary,
+};
+
 struct SaveDataDescriptor {
     u64_le title_id;
     u128 user_id;
     u64_le save_id;
     SaveDataType type;
-    INSERT_PADDING_BYTES(7);
+    SaveDataRank rank;
+    u16_le index;
+    INSERT_PADDING_BYTES(4);
     u64_le zero_1;
     u64_le zero_2;
     u64_le zero_3;
@@ -57,7 +64,7 @@ public:
     explicit SaveDataFactory(VirtualDir dir);
     ~SaveDataFactory();
 
-    ResultVal<VirtualDir> Open(SaveDataSpaceId space, SaveDataDescriptor meta);
+    ResultVal<VirtualDir> Open(SaveDataSpaceId space, const SaveDataDescriptor& meta);
 
     VirtualDir GetSaveDataSpaceDirectory(SaveDataSpaceId space) const;
 
diff --git a/src/core/file_sys/system_archive/system_archive.cpp b/src/core/file_sys/system_archive/system_archive.cpp
index e3e79f40a..c9722ed77 100644
--- a/src/core/file_sys/system_archive/system_archive.cpp
+++ b/src/core/file_sys/system_archive/system_archive.cpp
@@ -6,6 +6,7 @@
 #include "core/file_sys/romfs.h"
 #include "core/file_sys/system_archive/ng_word.h"
 #include "core/file_sys/system_archive/system_archive.h"
+#include "core/file_sys/system_archive/system_version.h"
 
 namespace FileSys::SystemArchive {
 
@@ -30,7 +31,7 @@ constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHI
     {0x0100000000000806, "NgWord", &NgWord1},
     {0x0100000000000807, "SsidList", nullptr},
     {0x0100000000000808, "Dictionary", nullptr},
-    {0x0100000000000809, "SystemVersion", nullptr},
+    {0x0100000000000809, "SystemVersion", &SystemVersion},
     {0x010000000000080A, "AvatarImage", nullptr},
     {0x010000000000080B, "LocalNews", nullptr},
     {0x010000000000080C, "Eula", nullptr},
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp
new file mode 100644
index 000000000..6e22f97b0
--- /dev/null
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/file_sys/system_archive/system_version.h"
+#include "core/file_sys/vfs_vector.h"
+
+namespace FileSys::SystemArchive {
+
+namespace SystemVersionData {
+
+// This section should reflect the best system version to describe yuzu's HLE api.
+// TODO(DarkLordZach): Update when HLE gets better.
+
+constexpr u8 VERSION_MAJOR = 5;
+constexpr u8 VERSION_MINOR = 1;
+constexpr u8 VERSION_MICRO = 0;
+
+constexpr u8 REVISION_MAJOR = 3;
+constexpr u8 REVISION_MINOR = 0;
+
+constexpr char PLATFORM_STRING[] = "NX";
+constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd";
+constexpr char DISPLAY_VERSION[] = "5.1.0";
+constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0";
+
+} // namespace SystemVersionData
+
+std::string GetLongDisplayVersion() {
+    return SystemVersionData::DISPLAY_TITLE;
+}
+
+VirtualDir SystemVersion() {
+    VirtualFile file = std::make_shared<VectorVfsFile>(std::vector<u8>(0x100), "file");
+    file->WriteObject(SystemVersionData::VERSION_MAJOR, 0);
+    file->WriteObject(SystemVersionData::VERSION_MINOR, 1);
+    file->WriteObject(SystemVersionData::VERSION_MICRO, 2);
+    file->WriteObject(SystemVersionData::REVISION_MAJOR, 4);
+    file->WriteObject(SystemVersionData::REVISION_MINOR, 5);
+    file->WriteArray(SystemVersionData::PLATFORM_STRING,
+                     std::min<u64>(sizeof(SystemVersionData::PLATFORM_STRING), 0x20ULL), 0x8);
+    file->WriteArray(SystemVersionData::VERSION_HASH,
+                     std::min<u64>(sizeof(SystemVersionData::VERSION_HASH), 0x40ULL), 0x28);
+    file->WriteArray(SystemVersionData::DISPLAY_VERSION,
+                     std::min<u64>(sizeof(SystemVersionData::DISPLAY_VERSION), 0x18ULL), 0x68);
+    file->WriteArray(SystemVersionData::DISPLAY_TITLE,
+                     std::min<u64>(sizeof(SystemVersionData::DISPLAY_TITLE), 0x80ULL), 0x80);
+    return std::make_shared<VectorVfsDirectory>(std::vector<VirtualFile>{file},
+                                                std::vector<VirtualDir>{}, "data");
+}
+
+} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/system_version.h b/src/core/file_sys/system_archive/system_version.h
new file mode 100644
index 000000000..deed79b26
--- /dev/null
+++ b/src/core/file_sys/system_archive/system_version.h
@@ -0,0 +1,16 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include "core/file_sys/vfs_types.h"
+
+namespace FileSys::SystemArchive {
+
+std::string GetLongDisplayVersion();
+
+VirtualDir SystemVersion();
+
+} // namespace FileSys::SystemArchive
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index e29afd630..1320bbe77 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -30,7 +30,7 @@ private:
         explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {}
         std::tuple<float, float, bool> GetStatus() const override {
             if (auto state = touch_state.lock()) {
-                std::lock_guard<std::mutex> guard(state->mutex);
+                std::lock_guard guard{state->mutex};
                 return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed);
             }
             return std::make_tuple(0.0f, 0.0f, false);
@@ -81,7 +81,7 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
     if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
         return;
 
-    std::lock_guard<std::mutex> guard(touch_state->mutex);
+    std::lock_guard guard{touch_state->mutex};
     touch_state->touch_x = static_cast<float>(framebuffer_x - framebuffer_layout.screen.left) /
                            (framebuffer_layout.screen.right - framebuffer_layout.screen.left);
     touch_state->touch_y = static_cast<float>(framebuffer_y - framebuffer_layout.screen.top) /
@@ -91,7 +91,7 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
 }
 
 void EmuWindow::TouchReleased() {
-    std::lock_guard<std::mutex> guard(touch_state->mutex);
+    std::lock_guard guard{touch_state->mutex};
     touch_state->touch_pressed = false;
     touch_state->touch_x = 0;
     touch_state->touch_y = 0;
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index dafb32aae..afa812598 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -1030,7 +1030,7 @@ static void Step() {
 
 /// Tell the CPU if we hit a memory breakpoint.
 bool IsMemoryBreak() {
-    if (IsConnected()) {
+    if (!IsConnected()) {
         return false;
     }
 
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index 455d1f346..fae54bcc7 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -39,10 +39,10 @@ struct CommandHeader {
     union {
         u32_le raw_low;
         BitField<0, 16, CommandType> type;
-        BitField<16, 4, u32_le> num_buf_x_descriptors;
-        BitField<20, 4, u32_le> num_buf_a_descriptors;
-        BitField<24, 4, u32_le> num_buf_b_descriptors;
-        BitField<28, 4, u32_le> num_buf_w_descriptors;
+        BitField<16, 4, u32> num_buf_x_descriptors;
+        BitField<20, 4, u32> num_buf_a_descriptors;
+        BitField<24, 4, u32> num_buf_b_descriptors;
+        BitField<28, 4, u32> num_buf_w_descriptors;
     };
 
     enum class BufferDescriptorCFlag : u32 {
@@ -53,28 +53,28 @@ struct CommandHeader {
 
     union {
         u32_le raw_high;
-        BitField<0, 10, u32_le> data_size;
+        BitField<0, 10, u32> data_size;
         BitField<10, 4, BufferDescriptorCFlag> buf_c_descriptor_flags;
-        BitField<31, 1, u32_le> enable_handle_descriptor;
+        BitField<31, 1, u32> enable_handle_descriptor;
     };
 };
 static_assert(sizeof(CommandHeader) == 8, "CommandHeader size is incorrect");
 
 union HandleDescriptorHeader {
     u32_le raw_high;
-    BitField<0, 1, u32_le> send_current_pid;
-    BitField<1, 4, u32_le> num_handles_to_copy;
-    BitField<5, 4, u32_le> num_handles_to_move;
+    BitField<0, 1, u32> send_current_pid;
+    BitField<1, 4, u32> num_handles_to_copy;
+    BitField<5, 4, u32> num_handles_to_move;
 };
 static_assert(sizeof(HandleDescriptorHeader) == 4, "HandleDescriptorHeader size is incorrect");
 
 struct BufferDescriptorX {
     union {
-        BitField<0, 6, u32_le> counter_bits_0_5;
-        BitField<6, 3, u32_le> address_bits_36_38;
-        BitField<9, 3, u32_le> counter_bits_9_11;
-        BitField<12, 4, u32_le> address_bits_32_35;
-        BitField<16, 16, u32_le> size;
+        BitField<0, 6, u32> counter_bits_0_5;
+        BitField<6, 3, u32> address_bits_36_38;
+        BitField<9, 3, u32> counter_bits_9_11;
+        BitField<12, 4, u32> address_bits_32_35;
+        BitField<16, 16, u32> size;
     };
 
     u32_le address_bits_0_31;
@@ -103,10 +103,10 @@ struct BufferDescriptorABW {
     u32_le address_bits_0_31;
 
     union {
-        BitField<0, 2, u32_le> flags;
-        BitField<2, 3, u32_le> address_bits_36_38;
-        BitField<24, 4, u32_le> size_bits_32_35;
-        BitField<28, 4, u32_le> address_bits_32_35;
+        BitField<0, 2, u32> flags;
+        BitField<2, 3, u32> address_bits_36_38;
+        BitField<24, 4, u32> size_bits_32_35;
+        BitField<28, 4, u32> address_bits_32_35;
     };
 
     VAddr Address() const {
@@ -128,8 +128,8 @@ struct BufferDescriptorC {
     u32_le address_bits_0_31;
 
     union {
-        BitField<0, 16, u32_le> address_bits_32_47;
-        BitField<16, 16, u32_le> size;
+        BitField<0, 16, u32> address_bits_32_47;
+        BitField<16, 16, u32> size;
     };
 
     VAddr Address() const {
@@ -167,8 +167,8 @@ struct DomainMessageHeader {
         struct {
             union {
                 BitField<0, 8, CommandType> command;
-                BitField<8, 8, u32_le> input_object_count;
-                BitField<16, 16, u32_le> size;
+                BitField<8, 8, u32> input_object_count;
+                BitField<16, 16, u32> size;
             };
             u32_le object_id;
             INSERT_PADDING_WORDS(2);
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index a1e4be070..ac0e1d796 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -139,10 +139,8 @@ public:
             context->AddDomainObject(std::move(iface));
         } else {
             auto& kernel = Core::System::GetInstance().Kernel();
-            auto sessions =
+            auto [server, client] =
                 Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName());
-            auto server = std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions);
-            auto client = std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions);
             iface->ClientConnected(server);
             context->AddMoveObject(std::move(client));
         }
@@ -275,6 +273,20 @@ inline void ResponseBuilder::Push(u64 value) {
 }
 
 template <>
+inline void ResponseBuilder::Push(float value) {
+    u32 integral;
+    std::memcpy(&integral, &value, sizeof(u32));
+    Push(integral);
+}
+
+template <>
+inline void ResponseBuilder::Push(double value) {
+    u64 integral;
+    std::memcpy(&integral, &value, sizeof(u64));
+    Push(integral);
+}
+
+template <>
 inline void ResponseBuilder::Push(bool value) {
     Push(static_cast<u8>(value));
 }
@@ -416,6 +428,22 @@ inline s64 RequestParser::Pop() {
 }
 
 template <>
+inline float RequestParser::Pop() {
+    const u32 value = Pop<u32>();
+    float real;
+    std::memcpy(&real, &value, sizeof(real));
+    return real;
+}
+
+template <>
+inline double RequestParser::Pop() {
+    const u64 value = Pop<u64>();
+    float real;
+    std::memcpy(&real, &value, sizeof(real));
+    return real;
+}
+
+template <>
 inline bool RequestParser::Pop() {
     return Pop<u8>() != 0;
 }
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 352190da8..c8842410b 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -26,7 +26,7 @@ void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_
     // them all.
     std::size_t last = waiting_threads.size();
     if (num_to_wake > 0) {
-        last = num_to_wake;
+        last = std::min(last, static_cast<std::size_t>(num_to_wake));
     }
 
     // Signal the waiting threads.
@@ -90,9 +90,9 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a
     // Determine the modified value depending on the waiting count.
     s32 updated_value;
     if (waiting_threads.empty()) {
-        updated_value = value - 1;
-    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
         updated_value = value + 1;
+    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
+        updated_value = value - 1;
     } else {
         updated_value = value;
     }
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index aa432658e..744b1697d 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <tuple>
-
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
 #include "core/hle/kernel/errors.h"
@@ -31,18 +29,18 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
     active_sessions++;
 
     // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
-    auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
+    auto [server, client] = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
 
     if (server_port->HasHLEHandler()) {
-        server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
+        server_port->GetHLEHandler()->ClientConnected(server);
     } else {
-        server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions));
+        server_port->AppendPendingSession(server);
     }
 
     // Wake the threads waiting on the ServerPort
     server_port->WakeupAllWaitingThreads();
 
-    return MakeResult(std::get<SharedPtr<ClientSession>>(sessions));
+    return MakeResult(client);
 }
 
 void ClientPort::ConnectionClosed() {
diff --git a/src/core/hle/kernel/code_set.cpp b/src/core/hle/kernel/code_set.cpp
new file mode 100644
index 000000000..1f434e9af
--- /dev/null
+++ b/src/core/hle/kernel/code_set.cpp
@@ -0,0 +1,12 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/kernel/code_set.h"
+
+namespace Kernel {
+
+CodeSet::CodeSet() = default;
+CodeSet::~CodeSet() = default;
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
new file mode 100644
index 000000000..879957dcb
--- /dev/null
+++ b/src/core/hle/kernel/code_set.h
@@ -0,0 +1,89 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Kernel {
+
+/**
+ * Represents executable data that may be loaded into a kernel process.
+ *
+ * A code set consists of three basic segments:
+ *   - A code (AKA text) segment,
+ *   - A read-only data segment (rodata)
+ *   - A data segment
+ *
+ * The code segment is the portion of the object file that contains
+ * executable instructions.
+ *
+ * The read-only data segment in the portion of the object file that
+ * contains (as one would expect) read-only data, such as fixed constant
+ * values and data structures.
+ *
+ * The data segment is similar to the read-only data segment -- it contains
+ * variables and data structures that have predefined values, however,
+ * entities within this segment can be modified.
+ */
+struct CodeSet final {
+    /// A single segment within a code set.
+    struct Segment final {
+        /// The byte offset that this segment is located at.
+        std::size_t offset = 0;
+
+        /// The address to map this segment to.
+        VAddr addr = 0;
+
+        /// The size of this segment in bytes.
+        u32 size = 0;
+    };
+
+    explicit CodeSet();
+    ~CodeSet();
+
+    CodeSet(const CodeSet&) = delete;
+    CodeSet& operator=(const CodeSet&) = delete;
+
+    CodeSet(CodeSet&&) = default;
+    CodeSet& operator=(CodeSet&&) = default;
+
+    Segment& CodeSegment() {
+        return segments[0];
+    }
+
+    const Segment& CodeSegment() const {
+        return segments[0];
+    }
+
+    Segment& RODataSegment() {
+        return segments[1];
+    }
+
+    const Segment& RODataSegment() const {
+        return segments[1];
+    }
+
+    Segment& DataSegment() {
+        return segments[2];
+    }
+
+    const Segment& DataSegment() const {
+        return segments[2];
+    }
+
+    /// The overall data that backs this code set.
+    std::vector<u8> memory;
+
+    /// The segments that comprise this code set.
+    std::array<Segment, 3> segments;
+
+    /// The entry point address for this code set.
+    VAddr entrypoint = 0;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 4d224d01d..4d58e7c69 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -21,6 +21,7 @@
 #include "core/hle/kernel/thread.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
+#include "core/memory.h"
 
 namespace Kernel {
 
@@ -29,12 +30,12 @@ namespace Kernel {
  * @param thread_handle The handle of the thread that's been awoken
  * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
  */
-static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_late) {
+static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
     const auto proper_handle = static_cast<Handle>(thread_handle);
     const auto& system = Core::System::GetInstance();
 
     // Lock the global kernel mutex when we enter the kernel HLE.
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
 
     SharedPtr<Thread> thread =
         system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle);
@@ -62,7 +63,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
 
     if (thread->GetMutexWaitAddress() != 0 || thread->GetCondVarWaitAddress() != 0 ||
         thread->GetWaitHandle() != 0) {
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
+        ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex ||
+               thread->GetStatus() == ThreadStatus::WaitCondVar);
         thread->SetMutexWaitAddress(0);
         thread->SetCondVarWaitAddress(0);
         thread->SetWaitHandle(0);
@@ -114,7 +116,7 @@ struct KernelCore::Impl {
 
     // Creates the default system resource limit
     void InitializeSystemResourceLimit(KernelCore& kernel) {
-        system_resource_limit = ResourceLimit::Create(kernel, "System");
+        system_resource_limit = ResourceLimit::Create(kernel);
 
         // If setting the default system values fails, then something seriously wrong has occurred.
         ASSERT(system_resource_limit->SetLimitValue(ResourceType::PhysicalMemory, 0x200000000)
@@ -180,6 +182,7 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) {
 
 void KernelCore::MakeCurrentProcess(Process* process) {
     impl->current_process = process;
+    Memory::SetCurrentPageTable(&process->VMManager().page_table);
 }
 
 Process* KernelCore::CurrentProcess() {
@@ -190,6 +193,10 @@ const Process* KernelCore::CurrentProcess() const {
     return impl->current_process;
 }
 
+const std::vector<SharedPtr<Process>>& KernelCore::GetProcessList() const {
+    return impl->process_list;
+}
+
 void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
     impl->named_ports.emplace(std::move(name), std::move(port));
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index ff17ff865..6b8738599 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -8,9 +8,6 @@
 #include <unordered_map>
 #include "core/hle/kernel/object.h"
 
-template <typename T>
-class ResultVal;
-
 namespace Core {
 class System;
 }
@@ -75,6 +72,9 @@ public:
     /// Retrieves a const pointer to the current process.
     const Process* CurrentProcess() const;
 
+    /// Retrieves the list of processes.
+    const std::vector<SharedPtr<Process>>& GetProcessList() const;
+
     /// Adds a port to the named port table
     void AddNamedPort(std::string name, SharedPtr<ClientPort> port);
 
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 0743670ad..98e87313b 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -2,7 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <map>
 #include <utility>
 #include <vector>
 
@@ -10,8 +9,11 @@
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/object.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
@@ -57,41 +59,47 @@ static void TransferMutexOwnership(VAddr mutex_addr, SharedPtr<Thread> current_t
     }
 }
 
-ResultCode Mutex::TryAcquire(HandleTable& handle_table, VAddr address, Handle holding_thread_handle,
+Mutex::Mutex(Core::System& system) : system{system} {}
+Mutex::~Mutex() = default;
+
+ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
                              Handle requesting_thread_handle) {
     // The mutex address must be 4-byte aligned
     if ((address % sizeof(u32)) != 0) {
         return ERR_INVALID_ADDRESS;
     }
 
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    Thread* const current_thread = system.CurrentScheduler().GetCurrentThread();
     SharedPtr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
     SharedPtr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle);
 
     // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another
     // thread.
-    ASSERT(requesting_thread == GetCurrentThread());
+    ASSERT(requesting_thread == current_thread);
 
-    u32 addr_value = Memory::Read32(address);
+    const u32 addr_value = Memory::Read32(address);
 
     // If the mutex isn't being held, just return success.
     if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
         return RESULT_SUCCESS;
     }
 
-    if (holding_thread == nullptr)
+    if (holding_thread == nullptr) {
         return ERR_INVALID_HANDLE;
+    }
 
     // Wait until the mutex is released
-    GetCurrentThread()->SetMutexWaitAddress(address);
-    GetCurrentThread()->SetWaitHandle(requesting_thread_handle);
+    current_thread->SetMutexWaitAddress(address);
+    current_thread->SetWaitHandle(requesting_thread_handle);
 
-    GetCurrentThread()->SetStatus(ThreadStatus::WaitMutex);
-    GetCurrentThread()->InvalidateWakeupCallback();
+    current_thread->SetStatus(ThreadStatus::WaitMutex);
+    current_thread->InvalidateWakeupCallback();
 
     // Update the lock holder thread's priority to prevent priority inversion.
-    holding_thread->AddMutexWaiter(GetCurrentThread());
+    holding_thread->AddMutexWaiter(current_thread);
 
-    Core::System::GetInstance().PrepareReschedule();
+    system.PrepareReschedule();
 
     return RESULT_SUCCESS;
 }
@@ -102,7 +110,8 @@ ResultCode Mutex::Release(VAddr address) {
         return ERR_INVALID_ADDRESS;
     }
 
-    auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address);
+    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+    auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address);
 
     // There are no more threads waiting for the mutex, release it completely.
     if (thread == nullptr) {
@@ -111,7 +120,7 @@ ResultCode Mutex::Release(VAddr address) {
     }
 
     // Transfer the ownership of the mutex from the previous owner to the new one.
-    TransferMutexOwnership(address, GetCurrentThread(), thread);
+    TransferMutexOwnership(address, current_thread, thread);
 
     u32 mutex_value = thread->GetWaitHandle();
 
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
index 81e62d497..b904de2e8 100644
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -5,32 +5,34 @@
 #pragma once
 
 #include "common/common_types.h"
-#include "core/hle/kernel/object.h"
 
 union ResultCode;
 
-namespace Kernel {
+namespace Core {
+class System;
+}
 
-class HandleTable;
-class Thread;
+namespace Kernel {
 
 class Mutex final {
 public:
+    explicit Mutex(Core::System& system);
+    ~Mutex();
+
     /// Flag that indicates that a mutex still has threads waiting for it.
     static constexpr u32 MutexHasWaitersFlag = 0x40000000;
     /// Mask of the bits in a mutex address value that contain the mutex owner.
     static constexpr u32 MutexOwnerMask = 0xBFFFFFFF;
 
     /// Attempts to acquire a mutex at the specified address.
-    static ResultCode TryAcquire(HandleTable& handle_table, VAddr address,
-                                 Handle holding_thread_handle, Handle requesting_thread_handle);
+    ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
+                          Handle requesting_thread_handle);
 
     /// Releases the mutex at the specified address.
-    static ResultCode Release(VAddr address);
+    ResultCode Release(VAddr address);
 
 private:
-    Mutex() = default;
-    ~Mutex() = default;
+    Core::System& system;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/object.cpp b/src/core/hle/kernel/object.cpp
index 8870463d0..10431e94c 100644
--- a/src/core/hle/kernel/object.cpp
+++ b/src/core/hle/kernel/object.cpp
@@ -23,7 +23,7 @@ bool Object::IsWaitable() const {
     case HandleType::Unknown:
     case HandleType::WritableEvent:
     case HandleType::SharedMemory:
-    case HandleType::AddressArbiter:
+    case HandleType::TransferMemory:
     case HandleType::ResourceLimit:
     case HandleType::ClientPort:
     case HandleType::ClientSession:
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h
index 4c2505908..332876c27 100644
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -22,9 +22,9 @@ enum class HandleType : u32 {
     WritableEvent,
     ReadableEvent,
     SharedMemory,
+    TransferMemory,
     Thread,
     Process,
-    AddressArbiter,
     ResourceLimit,
     ClientPort,
     ServerPort,
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 49fced7b1..4e94048da 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -5,10 +5,12 @@
 #include <algorithm>
 #include <memory>
 #include <random>
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/file_sys/program_metadata.h"
+#include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
@@ -30,9 +32,6 @@ namespace {
  * @param priority The priority to give the main thread
  */
 void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
-    // Setup page table so we can write to memory
-    SetCurrentPageTable(&owner_process.VMManager().page_table);
-
     // Initialize new "main" thread
     const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
     auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0,
@@ -50,9 +49,6 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi
 }
 } // Anonymous namespace
 
-CodeSet::CodeSet() = default;
-CodeSet::~CodeSet() = default;
-
 SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) {
     auto& kernel = system.Kernel();
 
@@ -77,6 +73,18 @@ SharedPtr<ResourceLimit> Process::GetResourceLimit() const {
     return resource_limit;
 }
 
+u64 Process::GetTotalPhysicalMemoryUsed() const {
+    return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size;
+}
+
+void Process::RegisterThread(const Thread* thread) {
+    thread_list.push_back(thread);
+}
+
+void Process::UnregisterThread(const Thread* thread) {
+    thread_list.remove(thread);
+}
+
 ResultCode Process::ClearSignalState() {
     if (status == ProcessStatus::Exited) {
         LOG_ERROR(Kernel, "called on a terminated process instance.");
@@ -98,6 +106,8 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
     is_64bit_process = metadata.Is64BitProgram();
 
     vm_manager.Reset(metadata.GetAddressSpaceType());
+    // Ensure that the potentially resized page table is seen by CPU backends.
+    Memory::SetCurrentPageTable(&vm_manager.page_table);
 
     const auto& caps = metadata.GetKernelCapabilities();
     const auto capability_init_result =
@@ -109,14 +119,17 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
     return handle_table.SetSize(capabilities.GetHandleTableSize());
 }
 
-void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
+void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) {
+    // The kernel always ensures that the given stack size is page aligned.
+    main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
+
     // Allocate and map the main thread stack
     // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
     // of the user address space.
+    const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
     vm_manager
-        .MapMemoryBlock(vm_manager.GetTLSIORegionEndAddress() - stack_size,
-                        std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size,
-                        MemoryState::Stack)
+        .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
+                        0, main_thread_stack_size, MemoryState::Stack)
         .Unwrap();
 
     vm_manager.LogLayout();
@@ -212,33 +225,38 @@ void Process::FreeTLSSlot(VAddr tls_address) {
 }
 
 void Process::LoadModule(CodeSet module_, VAddr base_addr) {
-    const auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions,
+    const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory));
+
+    const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
                                 MemoryState memory_state) {
         const auto vma = vm_manager
-                             .MapMemoryBlock(segment.addr + base_addr, module_.memory,
-                                             segment.offset, segment.size, memory_state)
+                             .MapMemoryBlock(segment.addr + base_addr, memory, segment.offset,
+                                             segment.size, memory_state)
                              .Unwrap();
         vm_manager.Reprotect(vma, permissions);
     };
 
     // Map CodeSet segments
-    MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::CodeStatic);
-    MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeMutable);
-    MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable);
+    MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
+    MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
+    MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
+
+    code_memory_size += module_.memory.size();
 
     // Clear instruction cache in CPU JIT
     system.InvalidateCpuInstructionCaches();
 }
 
 Process::Process(Core::System& system)
-    : WaitObject{system.Kernel()}, address_arbiter{system}, system{system} {}
+    : WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
+
 Process::~Process() = default;
 
 void Process::Acquire(Thread* thread) {
     ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
 }
 
-bool Process::ShouldWait(Thread* thread) const {
+bool Process::ShouldWait(const Thread* thread) const {
     return !is_signaled;
 }
 
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 47ffd4ad3..f060f2a3b 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -7,13 +7,14 @@
 #include <array>
 #include <bitset>
 #include <cstddef>
-#include <memory>
+#include <list>
 #include <string>
 #include <vector>
 #include <boost/container/static_vector.hpp>
 #include "common/common_types.h"
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/process_capability.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/hle/kernel/wait_object.h"
@@ -33,13 +34,7 @@ class KernelCore;
 class ResourceLimit;
 class Thread;
 
-struct AddressMapping {
-    // Address and size must be page-aligned
-    VAddr address;
-    u64 size;
-    bool read_only;
-    bool unk_flag;
-};
+struct CodeSet;
 
 enum class MemoryRegion : u16 {
     APPLICATION = 1,
@@ -65,46 +60,6 @@ enum class ProcessStatus {
     DebugBreak,
 };
 
-struct CodeSet final {
-    struct Segment {
-        std::size_t offset = 0;
-        VAddr addr = 0;
-        u32 size = 0;
-    };
-
-    explicit CodeSet();
-    ~CodeSet();
-
-    Segment& CodeSegment() {
-        return segments[0];
-    }
-
-    const Segment& CodeSegment() const {
-        return segments[0];
-    }
-
-    Segment& RODataSegment() {
-        return segments[1];
-    }
-
-    const Segment& RODataSegment() const {
-        return segments[1];
-    }
-
-    Segment& DataSegment() {
-        return segments[2];
-    }
-
-    const Segment& DataSegment() const {
-        return segments[2];
-    }
-
-    std::shared_ptr<std::vector<u8>> memory;
-
-    std::array<Segment, 3> segments;
-    VAddr entrypoint = 0;
-};
-
 class Process final : public WaitObject {
 public:
     enum : u64 {
@@ -165,6 +120,16 @@ public:
         return address_arbiter;
     }
 
+    /// Gets a reference to the process' mutex lock.
+    Mutex& GetMutex() {
+        return mutex;
+    }
+
+    /// Gets a const reference to the process' mutex lock
+    const Mutex& GetMutex() const {
+        return mutex;
+    }
+
     /// Gets the current status of the process
     ProcessStatus GetStatus() const {
         return status;
@@ -222,6 +187,22 @@ public:
         return random_entropy.at(index);
     }
 
+    /// Retrieves the total physical memory used by this process in bytes.
+    u64 GetTotalPhysicalMemoryUsed() const;
+
+    /// Gets the list of all threads created with this process as their owner.
+    const std::list<const Thread*>& GetThreadList() const {
+        return thread_list;
+    }
+
+    /// Registers a thread as being created under this process,
+    /// adding it to this process' thread list.
+    void RegisterThread(const Thread* thread);
+
+    /// Unregisters a thread from this process, removing it
+    /// from this process' thread list.
+    void UnregisterThread(const Thread* thread);
+
     /// Clears the signaled state of the process if and only if it's signaled.
     ///
     /// @pre The process must not be already terminated. If this is called on a
@@ -246,7 +227,7 @@ public:
     /**
      * Applies address space changes and launches the process main thread.
      */
-    void Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size);
+    void Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size);
 
     /**
      * Prepares a process for termination by stopping all of its threads
@@ -270,7 +251,7 @@ private:
     ~Process() override;
 
     /// Checks if the specified thread should wait until this process is available.
-    bool ShouldWait(Thread* thread) const override;
+    bool ShouldWait(const Thread* thread) const override;
 
     /// Acquires/locks this process for the specified thread if it's available.
     void Acquire(Thread* thread) override;
@@ -283,6 +264,12 @@ private:
     /// Memory manager for this process.
     Kernel::VMManager vm_manager;
 
+    /// Size of the main thread's stack in bytes.
+    u64 main_thread_stack_size = 0;
+
+    /// Size of the loaded code memory in bytes.
+    u64 code_memory_size = 0;
+
     /// Current status of the process
     ProcessStatus status;
 
@@ -327,9 +314,17 @@ private:
     /// Per-process address arbiter.
     AddressArbiter address_arbiter;
 
+    /// The per-process mutex lock instance used for handling various
+    /// forms of services, such as lock arbitration, and condition
+    /// variable related facilities.
+    Mutex mutex;
+
     /// Random values for svcGetInfo RandomEntropy
     std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;
 
+    /// List of threads that are running with this process as their owner.
+    std::list<const Thread*> thread_list;
+
     /// System context
     Core::System& system;
 
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index 0e5083f70..c2b798a4e 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -14,7 +14,7 @@ namespace Kernel {
 ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {}
 ReadableEvent::~ReadableEvent() = default;
 
-bool ReadableEvent::ShouldWait(Thread* thread) const {
+bool ReadableEvent::ShouldWait(const Thread* thread) const {
     return !signaled;
 }
 
diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h
index 77a9c362c..2eb9dcbb7 100644
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -36,7 +36,7 @@ public:
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait(Thread* thread) const override;
+    bool ShouldWait(const Thread* thread) const override;
     void Acquire(Thread* thread) override;
 
     /// Unconditionally clears the readable event's state.
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index 2f9695005..173f69915 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -16,11 +16,8 @@ constexpr std::size_t ResourceTypeToIndex(ResourceType type) {
 ResourceLimit::ResourceLimit(KernelCore& kernel) : Object{kernel} {}
 ResourceLimit::~ResourceLimit() = default;
 
-SharedPtr<ResourceLimit> ResourceLimit::Create(KernelCore& kernel, std::string name) {
-    SharedPtr<ResourceLimit> resource_limit(new ResourceLimit(kernel));
-
-    resource_limit->name = std::move(name);
-    return resource_limit;
+SharedPtr<ResourceLimit> ResourceLimit::Create(KernelCore& kernel) {
+    return new ResourceLimit(kernel);
 }
 
 s64 ResourceLimit::GetCurrentResourceValue(ResourceType resource) const {
diff --git a/src/core/hle/kernel/resource_limit.h b/src/core/hle/kernel/resource_limit.h
index 59dc11c22..70e09858a 100644
--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -31,16 +31,14 @@ constexpr bool IsValidResourceType(ResourceType type) {
 
 class ResourceLimit final : public Object {
 public:
-    /**
-     * Creates a resource limit object.
-     */
-    static SharedPtr<ResourceLimit> Create(KernelCore& kernel, std::string name = "Unknown");
+    /// Creates a resource limit object.
+    static SharedPtr<ResourceLimit> Create(KernelCore& kernel);
 
     std::string GetTypeName() const override {
         return "ResourceLimit";
     }
     std::string GetName() const override {
-        return name;
+        return GetTypeName();
     }
 
     static const HandleType HANDLE_TYPE = HandleType::ResourceLimit;
@@ -95,9 +93,6 @@ private:
     ResourceArray limits{};
     /// Current resource limit values.
     ResourceArray values{};
-
-    /// Name of resource limit object.
-    std::string name;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 5fccfd9f4..e8447b69a 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -29,8 +29,8 @@ Scheduler::~Scheduler() {
 }
 
 bool Scheduler::HaveReadyThreads() const {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
-    return ready_queue.get_first() != nullptr;
+    std::lock_guard lock{scheduler_mutex};
+    return !ready_queue.empty();
 }
 
 Thread* Scheduler::GetCurrentThread() const {
@@ -46,22 +46,27 @@ Thread* Scheduler::PopNextReadyThread() {
     Thread* thread = GetCurrentThread();
 
     if (thread && thread->GetStatus() == ThreadStatus::Running) {
+        if (ready_queue.empty()) {
+            return thread;
+        }
         // We have to do better than the current thread.
         // This call returns null when that's not possible.
-        next = ready_queue.pop_first_better(thread->GetPriority());
-        if (!next) {
-            // Otherwise just keep going with the current thread
+        next = ready_queue.front();
+        if (next == nullptr || next->GetPriority() >= thread->GetPriority()) {
             next = thread;
         }
     } else {
-        next = ready_queue.pop_first();
+        if (ready_queue.empty()) {
+            return nullptr;
+        }
+        next = ready_queue.front();
     }
 
     return next;
 }
 
 void Scheduler::SwitchContext(Thread* new_thread) {
-    Thread* const previous_thread = GetCurrentThread();
+    Thread* previous_thread = GetCurrentThread();
     Process* const previous_process = system.Kernel().CurrentProcess();
 
     UpdateLastContextSwitchTime(previous_thread, previous_process);
@@ -75,7 +80,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
         if (previous_thread->GetStatus() == ThreadStatus::Running) {
             // This is only the case when a reschedule is triggered without the current thread
             // yielding execution (i.e. an event triggered, system core time-sliced, etc)
-            ready_queue.push_front(previous_thread->GetPriority(), previous_thread);
+            ready_queue.add(previous_thread, previous_thread->GetPriority(), false);
             previous_thread->SetStatus(ThreadStatus::Ready);
         }
     }
@@ -90,13 +95,12 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 
         current_thread = new_thread;
 
-        ready_queue.remove(new_thread->GetPriority(), new_thread);
+        ready_queue.remove(new_thread, new_thread->GetPriority());
         new_thread->SetStatus(ThreadStatus::Running);
 
         auto* const thread_owner_process = current_thread->GetOwnerProcess();
         if (previous_process != thread_owner_process) {
             system.Kernel().MakeCurrentProcess(thread_owner_process);
-            SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
         }
 
         cpu_core.LoadContext(new_thread->GetContext());
@@ -127,7 +131,7 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
 }
 
 void Scheduler::Reschedule() {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     Thread* cur = GetCurrentThread();
     Thread* next = PopNextReadyThread();
@@ -143,51 +147,54 @@ void Scheduler::Reschedule() {
     SwitchContext(next);
 }
 
-void Scheduler::AddThread(SharedPtr<Thread> thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+void Scheduler::AddThread(SharedPtr<Thread> thread) {
+    std::lock_guard lock{scheduler_mutex};
 
     thread_list.push_back(std::move(thread));
-    ready_queue.prepare(priority);
 }
 
 void Scheduler::RemoveThread(Thread* thread) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
                       thread_list.end());
 }
 
 void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.push_back(priority, thread);
+    ready_queue.add(thread, priority);
 }
 
 void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.remove(priority, thread);
+    ready_queue.remove(thread, priority);
 }
 
 void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
+    if (thread->GetPriority() == priority) {
+        return;
+    }
 
     // If thread was ready, adjust queues
     if (thread->GetStatus() == ThreadStatus::Ready)
-        ready_queue.move(thread, thread->GetPriority(), priority);
-    else
-        ready_queue.prepare(priority);
+        ready_queue.adjust(thread, thread->GetPriority(), priority);
 }
 
 Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const {
-    std::lock_guard<std::mutex> lock(scheduler_mutex);
+    std::lock_guard lock{scheduler_mutex};
 
     const u32 mask = 1U << core;
-    return ready_queue.get_first_filter([mask, maximum_priority](Thread const* thread) {
-        return (thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority;
-    });
+    for (auto* thread : ready_queue) {
+        if ((thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority) {
+            return thread;
+        }
+    }
+    return nullptr;
 }
 
 void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
@@ -199,8 +206,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
     ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
 
     // Yield this thread -- sleep for zero time and force reschedule to different thread
-    WaitCurrentThread_Sleep();
-    GetCurrentThread()->WakeAfterDelay(0);
+    GetCurrentThread()->Sleep(0);
 }
 
 void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -215,8 +221,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
     ASSERT(priority < THREADPRIO_COUNT);
 
     // Sleep for zero time to be able to force reschedule to different thread
-    WaitCurrentThread_Sleep();
-    GetCurrentThread()->WakeAfterDelay(0);
+    GetCurrentThread()->Sleep(0);
 
     Thread* suggested_thread = nullptr;
 
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 1c5bf57d9..b29bf7be8 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -7,7 +7,7 @@
 #include <mutex>
 #include <vector>
 #include "common/common_types.h"
-#include "common/thread_queue_list.h"
+#include "common/multi_level_queue.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/thread.h"
 
@@ -38,7 +38,7 @@ public:
     u64 GetLastContextSwitchTicks() const;
 
     /// Adds a new thread to the scheduler
-    void AddThread(SharedPtr<Thread> thread, u32 priority);
+    void AddThread(SharedPtr<Thread> thread);
 
     /// Removes a thread from the scheduler
     void RemoveThread(Thread* thread);
@@ -156,7 +156,7 @@ private:
     std::vector<SharedPtr<Thread>> thread_list;
 
     /// Lists only ready thread ids.
-    Common::ThreadQueueList<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
+    Common::MultiLevelQueue<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
 
     SharedPtr<Thread> current_thread = nullptr;
 
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index 0e1515c89..02e7c60e6 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -30,7 +30,7 @@ void ServerPort::AppendPendingSession(SharedPtr<ServerSession> pending_session)
     pending_sessions.push_back(std::move(pending_session));
 }
 
-bool ServerPort::ShouldWait(Thread* thread) const {
+bool ServerPort::ShouldWait(const Thread* thread) const {
     // If there are no pending sessions, we wait until a new one is added.
     return pending_sessions.empty();
 }
@@ -39,9 +39,8 @@ void ServerPort::Acquire(Thread* thread) {
     ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 }
 
-std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortPair(
-    KernelCore& kernel, u32 max_sessions, std::string name) {
-
+ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions,
+                                                std::string name) {
     SharedPtr<ServerPort> server_port(new ServerPort(kernel));
     SharedPtr<ClientPort> client_port(new ClientPort(kernel));
 
@@ -51,7 +50,7 @@ std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortP
     client_port->max_sessions = max_sessions;
     client_port->active_sessions = 0;
 
-    return std::make_tuple(std::move(server_port), std::move(client_port));
+    return std::make_pair(std::move(server_port), std::move(client_port));
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h
index 9bc667cf2..fef573b71 100644
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -6,7 +6,7 @@
 
 #include <memory>
 #include <string>
-#include <tuple>
+#include <utility>
 #include <vector>
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
@@ -23,6 +23,7 @@ class SessionRequestHandler;
 class ServerPort final : public WaitObject {
 public:
     using HLEHandler = std::shared_ptr<SessionRequestHandler>;
+    using PortPair = std::pair<SharedPtr<ServerPort>, SharedPtr<ClientPort>>;
 
     /**
      * Creates a pair of ServerPort and an associated ClientPort.
@@ -32,8 +33,8 @@ public:
      * @param name Optional name of the ports
      * @return The created port tuple
      */
-    static std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> CreatePortPair(
-        KernelCore& kernel, u32 max_sessions, std::string name = "UnknownPort");
+    static PortPair CreatePortPair(KernelCore& kernel, u32 max_sessions,
+                                   std::string name = "UnknownPort");
 
     std::string GetTypeName() const override {
         return "ServerPort";
@@ -75,7 +76,7 @@ public:
     /// waiting to be accepted by this port.
     void AppendPendingSession(SharedPtr<ServerSession> pending_session);
 
-    bool ShouldWait(Thread* thread) const override;
+    bool ShouldWait(const Thread* thread) const override;
     void Acquire(Thread* thread) override;
 
 private:
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 4d8a337a7..a6b2cf06a 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -46,7 +46,7 @@ ResultVal<SharedPtr<ServerSession>> ServerSession::Create(KernelCore& kernel, st
     return MakeResult(std::move(server_session));
 }
 
-bool ServerSession::ShouldWait(Thread* thread) const {
+bool ServerSession::ShouldWait(const Thread* thread) const {
     // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
     if (parent->client == nullptr)
         return false;
@@ -204,6 +204,6 @@ ServerSession::SessionPair ServerSession::CreateSessionPair(KernelCore& kernel,
     client_session->parent = parent;
     server_session->parent = parent;
 
-    return std::make_tuple(std::move(server_session), std::move(client_session));
+    return std::make_pair(std::move(server_session), std::move(client_session));
 }
 } // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index aea4ccfeb..09b835ff8 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -6,6 +6,7 @@
 
 #include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "core/hle/kernel/object.h"
@@ -41,6 +42,10 @@ public:
         return "ServerSession";
     }
 
+    std::string GetName() const override {
+        return name;
+    }
+
     static const HandleType HANDLE_TYPE = HandleType::ServerSession;
     HandleType GetHandleType() const override {
         return HANDLE_TYPE;
@@ -54,7 +59,7 @@ public:
         return parent.get();
     }
 
-    using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
+    using SessionPair = std::pair<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
 
     /**
      * Creates a pair of ServerSession and an associated ClientSession.
@@ -82,7 +87,7 @@ public:
      */
     ResultCode HandleSyncRequest(SharedPtr<Thread> thread);
 
-    bool ShouldWait(Thread* thread) const override;
+    bool ShouldWait(const Thread* thread) const override;
 
     void Acquire(Thread* thread) override;
 
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 62861da36..f15c5ee36 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -9,7 +9,6 @@
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/shared_memory.h"
-#include "core/memory.h"
 
 namespace Kernel {
 
@@ -119,7 +118,15 @@ ResultCode SharedMemory::Map(Process& target_process, VAddr address, MemoryPermi
                                                      ConvertPermissions(permissions));
 }
 
-ResultCode SharedMemory::Unmap(Process& target_process, VAddr address) {
+ResultCode SharedMemory::Unmap(Process& target_process, VAddr address, u64 unmap_size) {
+    if (unmap_size != size) {
+        LOG_ERROR(Kernel,
+                  "Invalid size passed to Unmap. Size must be equal to the size of the "
+                  "memory managed. Shared memory size=0x{:016X}, Unmap size=0x{:016X}",
+                  size, unmap_size);
+        return ERR_INVALID_SIZE;
+    }
+
     // TODO(Subv): Verify what happens if the application tries to unmap an address that is not
     // mapped to a SharedMemory.
     return target_process.VMManager().UnmapRange(address, size);
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index dab2a6bea..37e18c443 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -104,11 +104,17 @@ public:
 
     /**
      * Unmaps a shared memory block from the specified address in system memory
+     *
      * @param target_process Process from which to unmap the memory block.
-     * @param address Address in system memory where the shared memory block is mapped
+     * @param address        Address in system memory where the shared memory block is mapped.
+     * @param unmap_size     The amount of bytes to unmap from this shared memory instance.
+     *
      * @return Result code of the unmap operation
+     *
+     * @pre The given size to unmap must be the same size as the amount of memory managed by
+     *      the SharedMemory instance itself, otherwise ERR_INVALID_SIZE will be returned.
      */
-    ResultCode Unmap(Process& target_process, VAddr address);
+    ResultCode Unmap(Process& target_process, VAddr address, u64 unmap_size);
 
     /**
      * Gets a pointer to the shared memory block
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 77d0e3d96..2fd07ab34 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -32,6 +32,7 @@
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/svc_wrap.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
@@ -174,11 +175,8 @@ static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
         return ERR_INVALID_SIZE;
     }
 
-    auto& vm_manager = Core::CurrentProcess()->VMManager();
-    const VAddr heap_base = vm_manager.GetHeapRegionBaseAddress();
-    const auto alloc_result =
-        vm_manager.HeapAllocate(heap_base, heap_size, VMAPermission::ReadWrite);
-
+    auto& vm_manager = Core::System::GetInstance().Kernel().CurrentProcess()->VMManager();
+    const auto alloc_result = vm_manager.SetHeapSize(heap_size);
     if (alloc_result.Failed()) {
         return alloc_result.Code();
     }
@@ -551,9 +549,9 @@ static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
         return ERR_INVALID_ADDRESS;
     }
 
-    auto& handle_table = Core::CurrentProcess()->GetHandleTable();
-    return Mutex::TryAcquire(handle_table, mutex_addr, holding_thread_handle,
-                             requesting_thread_handle);
+    auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    return current_process->GetMutex().TryAcquire(mutex_addr, holding_thread_handle,
+                                                  requesting_thread_handle);
 }
 
 /// Unlock a mutex
@@ -571,7 +569,8 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
         return ERR_INVALID_ADDRESS;
     }
 
-    return Mutex::Release(mutex_addr);
+    auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    return current_process->GetMutex().Release(mutex_addr);
 }
 
 enum class BreakType : u32 {
@@ -710,7 +709,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
         HeapRegionBaseAddr = 4,
         HeapRegionSize = 5,
         TotalMemoryUsage = 6,
-        TotalHeapUsage = 7,
+        TotalPhysicalMemoryUsed = 7,
         IsCurrentProcessBeingDebugged = 8,
         RegisterResourceLimit = 9,
         IdleTickCount = 10,
@@ -746,7 +745,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
     case GetInfoType::NewMapRegionBaseAddr:
     case GetInfoType::NewMapRegionSize:
     case GetInfoType::TotalMemoryUsage:
-    case GetInfoType::TotalHeapUsage:
+    case GetInfoType::TotalPhysicalMemoryUsed:
     case GetInfoType::IsVirtualAddressMemoryEnabled:
     case GetInfoType::PersonalMmHeapUsage:
     case GetInfoType::TitleId:
@@ -806,8 +805,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
             *result = process->VMManager().GetTotalMemoryUsage();
             return RESULT_SUCCESS;
 
-        case GetInfoType::TotalHeapUsage:
-            *result = process->VMManager().GetTotalHeapUsage();
+        case GetInfoType::TotalPhysicalMemoryUsed:
+            *result = process->GetTotalPhysicalMemoryUsed();
             return RESULT_SUCCESS;
 
         case GetInfoType::IsVirtualAddressMemoryEnabled:
@@ -1141,7 +1140,7 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
         return ERR_INVALID_MEMORY_RANGE;
     }
 
-    return shared_memory->Unmap(*current_process, addr);
+    return shared_memory->Unmap(*current_process, addr, size);
 }
 
 static ResultCode QueryProcessMemory(VAddr memory_info_address, VAddr page_info_address,
@@ -1284,10 +1283,14 @@ static ResultCode StartThread(Handle thread_handle) {
 
 /// Called when a thread exits
 static void ExitThread() {
-    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());
+    auto& system = Core::System::GetInstance();
 
-    ExitCurrentThread();
-    Core::System::GetInstance().PrepareReschedule();
+    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
+
+    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+    current_thread->Stop();
+    system.CurrentScheduler().RemoveThread(current_thread);
+    system.PrepareReschedule();
 }
 
 /// Sleep the current thread
@@ -1300,32 +1303,32 @@ static void SleepThread(s64 nanoseconds) {
         YieldAndWaitForLoadBalancing = -2,
     };
 
+    auto& system = Core::System::GetInstance();
+    auto& scheduler = system.CurrentScheduler();
+    auto* const current_thread = scheduler.GetCurrentThread();
+
     if (nanoseconds <= 0) {
-        auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
         switch (static_cast<SleepType>(nanoseconds)) {
         case SleepType::YieldWithoutLoadBalancing:
-            scheduler.YieldWithoutLoadBalancing(GetCurrentThread());
+            scheduler.YieldWithoutLoadBalancing(current_thread);
             break;
         case SleepType::YieldWithLoadBalancing:
-            scheduler.YieldWithLoadBalancing(GetCurrentThread());
+            scheduler.YieldWithLoadBalancing(current_thread);
             break;
         case SleepType::YieldAndWaitForLoadBalancing:
-            scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread());
+            scheduler.YieldAndWaitForLoadBalancing(current_thread);
             break;
         default:
             UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
         }
     } else {
-        // Sleep current thread and check for next thread to schedule
-        WaitCurrentThread_Sleep();
-
-        // Create an event to wake the thread up after the specified nanosecond delay has passed
-        GetCurrentThread()->WakeAfterDelay(nanoseconds);
+        current_thread->Sleep(nanoseconds);
     }
 
     // Reschedule all CPU cores
-    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i)
-        Core::System::GetInstance().CpuCore(i).PrepareReschedule();
+    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
+        system.CpuCore(i).PrepareReschedule();
+    }
 }
 
 /// Wait process wide key atomic
@@ -1336,17 +1339,35 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
         "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}",
         mutex_addr, condition_variable_addr, thread_handle, nano_seconds);
 
-    const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+    if (Memory::IsKernelVirtualAddress(mutex_addr)) {
+        LOG_ERROR(
+            Kernel_SVC,
+            "Given mutex address must not be within the kernel address space. address=0x{:016X}",
+            mutex_addr);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!Common::IsWordAligned(mutex_addr)) {
+        LOG_ERROR(Kernel_SVC, "Given mutex address must be word-aligned. address=0x{:016X}",
+                  mutex_addr);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
     SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
     ASSERT(thread);
 
-    CASCADE_CODE(Mutex::Release(mutex_addr));
+    const auto release_result = current_process->GetMutex().Release(mutex_addr);
+    if (release_result.IsError()) {
+        return release_result;
+    }
 
     SharedPtr<Thread> current_thread = GetCurrentThread();
     current_thread->SetCondVarWaitAddress(condition_variable_addr);
     current_thread->SetMutexWaitAddress(mutex_addr);
     current_thread->SetWaitHandle(thread_handle);
-    current_thread->SetStatus(ThreadStatus::WaitMutex);
+    current_thread->SetStatus(ThreadStatus::WaitCondVar);
     current_thread->InvalidateWakeupCallback();
 
     current_thread->WakeAfterDelay(nano_seconds);
@@ -1390,10 +1411,10 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
     // them all.
     std::size_t last = waiting_threads.size();
     if (target != -1)
-        last = target;
+        last = std::min(waiting_threads.size(), static_cast<std::size_t>(target));
 
     // If there are no threads waiting on this condition variable, just exit
-    if (last > waiting_threads.size())
+    if (last == 0)
         return RESULT_SUCCESS;
 
     for (std::size_t index = 0; index < last; ++index) {
@@ -1401,6 +1422,9 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
         ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr);
 
+        // liberate Cond Var Thread.
+        thread->SetCondVarWaitAddress(0);
+
         std::size_t current_core = Core::System::GetInstance().CurrentCoreIndex();
 
         auto& monitor = Core::System::GetInstance().Monitor();
@@ -1419,10 +1443,9 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
             }
         } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(),
                                            thread->GetWaitHandle()));
-
         if (mutex_val == 0) {
             // We were able to acquire the mutex, resume this thread.
-            ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
+            ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
             thread->ResumeFromWait();
 
             auto* const lock_owner = thread->GetLockOwner();
@@ -1432,8 +1455,8 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
             thread->SetLockOwner(nullptr);
             thread->SetMutexWaitAddress(0);
-            thread->SetCondVarWaitAddress(0);
             thread->SetWaitHandle(0);
+            Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
         } else {
             // Atomically signal that the mutex now has a waiting thread.
             do {
@@ -1452,12 +1475,11 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
             const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
             auto owner = handle_table.Get<Thread>(owner_handle);
             ASSERT(owner);
-            ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
+            ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
             thread->InvalidateWakeupCallback();
+            thread->SetStatus(ThreadStatus::WaitMutex);
 
             owner->AddMutexWaiter(thread);
-
-            Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
         }
     }
 
@@ -1577,14 +1599,121 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
     }
 
     auto& kernel = Core::System::GetInstance().Kernel();
-    auto process = kernel.CurrentProcess();
-    auto& handle_table = process->GetHandleTable();
-    const auto shared_mem_handle = SharedMemory::Create(kernel, process, size, perms, perms, addr);
+    auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms);
 
-    CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle));
+    auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
+    const auto result = handle_table.Create(std::move(transfer_mem_handle));
+    if (result.Failed()) {
+        return result.Code();
+    }
+
+    *handle = *result;
     return RESULT_SUCCESS;
 }
 
+static ResultCode MapTransferMemory(Handle handle, VAddr address, u64 size, u32 permission_raw) {
+    LOG_DEBUG(Kernel_SVC,
+              "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}, permissions=0x{:08X}",
+              handle, address, size, permission_raw);
+
+    if (!Common::Is4KBAligned(address)) {
+        LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
+                  address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
+                  size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size overflows the 64-bit range (address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto permissions = static_cast<MemoryPermission>(permission_raw);
+    if (permissions != MemoryPermission::None && permissions != MemoryPermission::Read &&
+        permissions != MemoryPermission::ReadWrite) {
+        LOG_ERROR(Kernel_SVC, "Invalid transfer memory permissions given (permissions=0x{:08X}).",
+                  permission_raw);
+        return ERR_INVALID_STATE;
+    }
+
+    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto* const current_process = kernel.CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
+
+    auto transfer_memory = handle_table.Get<TransferMemory>(handle);
+    if (!transfer_memory) {
+        LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
+                  handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size don't fully fit within the ASLR region "
+                  "(address=0x{:016X}, size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return transfer_memory->MapMemory(address, size, permissions);
+}
+
+static ResultCode UnmapTransferMemory(Handle handle, VAddr address, u64 size) {
+    LOG_DEBUG(Kernel_SVC, "called. handle=0x{:08X}, address=0x{:016X}, size=0x{:016X}", handle,
+              address, size);
+
+    if (!Common::Is4KBAligned(address)) {
+        LOG_ERROR(Kernel_SVC, "Transfer memory addresses must be 4KB aligned (size=0x{:016X}).",
+                  address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Transfer memory sizes must be 4KB aligned and not be zero (size=0x{:016X}).",
+                  size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size overflows the 64-bit range (address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto* const current_process = kernel.CurrentProcess();
+    const auto& handle_table = current_process->GetHandleTable();
+
+    auto transfer_memory = handle_table.Get<TransferMemory>(handle);
+    if (!transfer_memory) {
+        LOG_ERROR(Kernel_SVC, "Nonexistent transfer memory handle given (handle=0x{:08X}).",
+                  handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (!current_process->VMManager().IsWithinASLRRegion(address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Given address and size don't fully fit within the ASLR region "
+                  "(address=0x{:016X}, size=0x{:016X}).",
+                  address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return transfer_memory->UnmapMemory(address, size);
+}
+
 static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
     LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
 
@@ -1868,6 +1997,83 @@ static ResultCode SetResourceLimitLimitValue(Handle resource_limit, u32 resource
     return RESULT_SUCCESS;
 }
 
+static ResultCode GetProcessList(u32* out_num_processes, VAddr out_process_ids,
+                                 u32 out_process_ids_size) {
+    LOG_DEBUG(Kernel_SVC, "called. out_process_ids=0x{:016X}, out_process_ids_size={}",
+              out_process_ids, out_process_ids_size);
+
+    // If the supplied size is negative or greater than INT32_MAX / sizeof(u64), bail.
+    if ((out_process_ids_size & 0xF0000000) != 0) {
+        LOG_ERROR(Kernel_SVC,
+                  "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}",
+                  out_process_ids_size);
+        return ERR_OUT_OF_RANGE;
+    }
+
+    const auto& kernel = Core::System::GetInstance().Kernel();
+    const auto& vm_manager = kernel.CurrentProcess()->VMManager();
+    const auto total_copy_size = out_process_ids_size * sizeof(u64);
+
+    if (out_process_ids_size > 0 &&
+        !vm_manager.IsWithinAddressSpace(out_process_ids, total_copy_size)) {
+        LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
+                  out_process_ids, out_process_ids + total_copy_size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& process_list = kernel.GetProcessList();
+    const auto num_processes = process_list.size();
+    const auto copy_amount = std::min(std::size_t{out_process_ids_size}, num_processes);
+
+    for (std::size_t i = 0; i < copy_amount; ++i) {
+        Memory::Write64(out_process_ids, process_list[i]->GetProcessID());
+        out_process_ids += sizeof(u64);
+    }
+
+    *out_num_processes = static_cast<u32>(num_processes);
+    return RESULT_SUCCESS;
+}
+
+ResultCode GetThreadList(u32* out_num_threads, VAddr out_thread_ids, u32 out_thread_ids_size,
+                         Handle debug_handle) {
+    // TODO: Handle this case when debug events are supported.
+    UNIMPLEMENTED_IF(debug_handle != InvalidHandle);
+
+    LOG_DEBUG(Kernel_SVC, "called. out_thread_ids=0x{:016X}, out_thread_ids_size={}",
+              out_thread_ids, out_thread_ids_size);
+
+    // If the size is negative or larger than INT32_MAX / sizeof(u64)
+    if ((out_thread_ids_size & 0xF0000000) != 0) {
+        LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}",
+                  out_thread_ids_size);
+        return ERR_OUT_OF_RANGE;
+    }
+
+    const auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
+    const auto& vm_manager = current_process->VMManager();
+    const auto total_copy_size = out_thread_ids_size * sizeof(u64);
+
+    if (out_thread_ids_size > 0 &&
+        !vm_manager.IsWithinAddressSpace(out_thread_ids, total_copy_size)) {
+        LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
+                  out_thread_ids, out_thread_ids + total_copy_size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& thread_list = current_process->GetThreadList();
+    const auto num_threads = thread_list.size();
+    const auto copy_amount = std::min(std::size_t{out_thread_ids_size}, num_threads);
+
+    auto list_iter = thread_list.cbegin();
+    for (std::size_t i = 0; i < copy_amount; ++i, ++list_iter) {
+        Memory::Write64(out_thread_ids, (*list_iter)->GetThreadID());
+        out_thread_ids += sizeof(u64);
+    }
+
+    *out_num_threads = static_cast<u32>(num_threads);
+    return RESULT_SUCCESS;
+}
+
 namespace {
 struct FunctionDef {
     using Func = void();
@@ -1960,8 +2166,8 @@ static const FunctionDef SVC_Table[] = {
     {0x4E, nullptr, "ReadWriteRegister"},
     {0x4F, nullptr, "SetProcessActivity"},
     {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"},
-    {0x51, nullptr, "MapTransferMemory"},
-    {0x52, nullptr, "UnmapTransferMemory"},
+    {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"},
+    {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"},
     {0x53, nullptr, "CreateInterruptEvent"},
     {0x54, nullptr, "QueryPhysicalAddress"},
     {0x55, nullptr, "QueryIoMapping"},
@@ -1980,8 +2186,8 @@ static const FunctionDef SVC_Table[] = {
     {0x62, nullptr, "TerminateDebugProcess"},
     {0x63, nullptr, "GetDebugEvent"},
     {0x64, nullptr, "ContinueDebugEvent"},
-    {0x65, nullptr, "GetProcessList"},
-    {0x66, nullptr, "GetThreadList"},
+    {0x65, SvcWrap<GetProcessList>, "GetProcessList"},
+    {0x66, SvcWrap<GetThreadList>, "GetThreadList"},
     {0x67, nullptr, "GetDebugThreadContext"},
     {0x68, nullptr, "SetDebugThreadContext"},
     {0x69, nullptr, "QueryDebugProcessMemory"},
@@ -2023,7 +2229,7 @@ void CallSVC(u32 immediate) {
     MICROPROFILE_SCOPE(Kernel_SVC);
 
     // Lock the global kernel mutex when we enter the kernel HLE.
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
 
     const FunctionDef* info = GetSVCInfo(immediate);
     if (info) {
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 2a2c2c5ea..b3733680f 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -78,6 +78,14 @@ void SvcWrap() {
     FuncReturn(retval);
 }
 
+template <ResultCode func(u32*, u64, u32)>
+void SvcWrap() {
+    u32 param_1 = 0;
+    const u32 retval = func(&param_1, Param(1), static_cast<u32>(Param(2))).raw;
+    Core::CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(retval);
+}
+
 template <ResultCode func(u64*, u32)>
 void SvcWrap() {
     u64 param_1 = 0;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index eb54d6651..1b891f632 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -7,8 +7,6 @@
 #include <optional>
 #include <vector>
 
-#include <boost/range/algorithm_ext/erase.hpp>
-
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
@@ -30,7 +28,7 @@
 
 namespace Kernel {
 
-bool Thread::ShouldWait(Thread* thread) const {
+bool Thread::ShouldWait(const Thread* thread) const {
     return status != ThreadStatus::Dead;
 }
 
@@ -64,21 +62,12 @@ void Thread::Stop() {
     }
     wait_objects.clear();
 
+    owner_process->UnregisterThread(this);
+
     // Mark the TLS slot in the thread's page as free.
     owner_process->FreeTLSSlot(tls_address);
 }
 
-void WaitCurrentThread_Sleep() {
-    Thread* thread = GetCurrentThread();
-    thread->SetStatus(ThreadStatus::WaitSleep);
-}
-
-void ExitCurrentThread() {
-    Thread* thread = GetCurrentThread();
-    thread->Stop();
-    Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
-}
-
 void Thread::WakeAfterDelay(s64 nanoseconds) {
     // Don't schedule a wakeup if the thread wants to wait forever
     if (nanoseconds == -1)
@@ -118,6 +107,7 @@ void Thread::ResumeFromWait() {
     case ThreadStatus::WaitSleep:
     case ThreadStatus::WaitIPC:
     case ThreadStatus::WaitMutex:
+    case ThreadStatus::WaitCondVar:
     case ThreadStatus::WaitArb:
         break;
 
@@ -211,9 +201,11 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
     thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
     thread->owner_process = &owner_process;
     thread->scheduler = &system.Scheduler(processor_id);
-    thread->scheduler->AddThread(thread, priority);
+    thread->scheduler->AddThread(thread);
     thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
 
+    thread->owner_process->RegisterThread(thread.get());
+
     // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
     // to initialize the context
     ResetThreadContext(thread->context, stack_top, entry_point, arg);
@@ -241,16 +233,16 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
     context.cpu_registers[1] = output;
 }
 
-s32 Thread::GetWaitObjectIndex(WaitObject* object) const {
+s32 Thread::GetWaitObjectIndex(const WaitObject* object) const {
     ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything");
-    auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
+    const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
     return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
 }
 
 VAddr Thread::GetCommandBufferAddress() const {
     // Offset from the start of TLS at which the IPC command buffer begins.
-    static constexpr int CommandHeaderOffset = 0x80;
-    return GetTLSAddress() + CommandHeaderOffset;
+    constexpr u64 command_header_offset = 0x80;
+    return GetTLSAddress() + command_header_offset;
 }
 
 void Thread::SetStatus(ThreadStatus new_status) {
@@ -269,8 +261,8 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
     if (thread->lock_owner == this) {
         // If the thread is already waiting for this thread to release the mutex, ensure that the
         // waiters list is consistent and return without doing anything.
-        auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-        ASSERT(itr != wait_mutex_threads.end());
+        const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+        ASSERT(iter != wait_mutex_threads.end());
         return;
     }
 
@@ -278,11 +270,16 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
     ASSERT(thread->lock_owner == nullptr);
 
     // Ensure that the thread is not already in the list of mutex waiters
-    auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(itr == wait_mutex_threads.end());
-
+    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+    ASSERT(iter == wait_mutex_threads.end());
+
+    // Keep the list in an ordered fashion
+    const auto insertion_point = std::find_if(
+        wait_mutex_threads.begin(), wait_mutex_threads.end(),
+        [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
+    wait_mutex_threads.insert(insertion_point, thread);
     thread->lock_owner = this;
-    wait_mutex_threads.emplace_back(std::move(thread));
+
     UpdatePriority();
 }
 
@@ -290,32 +287,44 @@ void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
     ASSERT(thread->lock_owner == this);
 
     // Ensure that the thread is in the list of mutex waiters
-    auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(itr != wait_mutex_threads.end());
+    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+    ASSERT(iter != wait_mutex_threads.end());
+
+    wait_mutex_threads.erase(iter);
 
-    boost::remove_erase(wait_mutex_threads, thread);
     thread->lock_owner = nullptr;
     UpdatePriority();
 }
 
 void Thread::UpdatePriority() {
-    // Find the highest priority among all the threads that are waiting for this thread's lock
+    // If any of the threads waiting on the mutex have a higher priority
+    // (taking into account priority inheritance), then this thread inherits
+    // that thread's priority.
     u32 new_priority = nominal_priority;
-    for (const auto& thread : wait_mutex_threads) {
-        if (thread->nominal_priority < new_priority)
-            new_priority = thread->nominal_priority;
+    if (!wait_mutex_threads.empty()) {
+        if (wait_mutex_threads.front()->current_priority < new_priority) {
+            new_priority = wait_mutex_threads.front()->current_priority;
+        }
     }
 
-    if (new_priority == current_priority)
+    if (new_priority == current_priority) {
         return;
+    }
 
     scheduler->SetThreadPriority(this, new_priority);
-
     current_priority = new_priority;
 
+    if (!lock_owner) {
+        return;
+    }
+
+    // Ensure that the thread is within the correct location in the waiting list.
+    auto old_owner = lock_owner;
+    lock_owner->RemoveMutexWaiter(this);
+    old_owner->AddMutexWaiter(this);
+
     // Recursively update the priority of the thread that depends on the priority of this one.
-    if (lock_owner)
-        lock_owner->UpdatePriority();
+    lock_owner->UpdatePriority();
 }
 
 void Thread::ChangeCore(u32 core, u64 mask) {
@@ -347,7 +356,7 @@ void Thread::ChangeScheduler() {
     if (*new_processor_id != processor_id) {
         // Remove thread from previous core's scheduler
         scheduler->RemoveThread(this);
-        next_scheduler.AddThread(this, current_priority);
+        next_scheduler.AddThread(this);
     }
 
     processor_id = *new_processor_id;
@@ -362,7 +371,7 @@ void Thread::ChangeScheduler() {
     system.CpuCore(processor_id).PrepareReschedule();
 }
 
-bool Thread::AllWaitObjectsReady() {
+bool Thread::AllWaitObjectsReady() const {
     return std::none_of(
         wait_objects.begin(), wait_objects.end(),
         [this](const SharedPtr<WaitObject>& object) { return object->ShouldWait(this); });
@@ -391,6 +400,14 @@ void Thread::SetActivity(ThreadActivity value) {
     }
 }
 
+void Thread::Sleep(s64 nanoseconds) {
+    // Sleep current thread and check for next thread to schedule
+    SetStatus(ThreadStatus::WaitSleep);
+
+    // Create an event to wake the thread up after the specified nanosecond delay has passed
+    WakeAfterDelay(nanoseconds);
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c48b21aba..73e5d1bb4 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -51,7 +51,8 @@ enum class ThreadStatus {
     WaitIPC,      ///< Waiting for the reply from an IPC request
     WaitSynchAny, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false
     WaitSynchAll, ///< Waiting due to WaitSynchronizationN with wait_all = true
-    WaitMutex,    ///< Waiting due to an ArbitrateLock/WaitProcessWideKey svc
+    WaitMutex,    ///< Waiting due to an ArbitrateLock svc
+    WaitCondVar,  ///< Waiting due to an WaitProcessWideKey svc
     WaitArb,      ///< Waiting due to a SignalToAddress/WaitForAddress svc
     Dormant,      ///< Created but not yet made ready
     Dead          ///< Run to completion, or forcefully terminated
@@ -110,7 +111,7 @@ public:
         return HANDLE_TYPE;
     }
 
-    bool ShouldWait(Thread* thread) const override;
+    bool ShouldWait(const Thread* thread) const override;
     void Acquire(Thread* thread) override;
 
     /**
@@ -204,7 +205,7 @@ public:
      * object in the list.
      * @param object Object to query the index of.
      */
-    s32 GetWaitObjectIndex(WaitObject* object) const;
+    s32 GetWaitObjectIndex(const WaitObject* object) const;
 
     /**
      * Stops a thread, invalidating it from further use
@@ -298,7 +299,7 @@ public:
     }
 
     /// Determines whether all the objects this thread is waiting on are ready.
-    bool AllWaitObjectsReady();
+    bool AllWaitObjectsReady() const;
 
     const MutexWaitingThreads& GetMutexWaitingThreads() const {
         return wait_mutex_threads;
@@ -383,6 +384,9 @@ public:
 
     void SetActivity(ThreadActivity value);
 
+    /// Sleeps this thread for the given amount of nanoseconds.
+    void Sleep(s64 nanoseconds);
+
 private:
     explicit Thread(KernelCore& kernel);
     ~Thread() override;
@@ -398,8 +402,14 @@ private:
     VAddr entry_point = 0;
     VAddr stack_top = 0;
 
-    u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application
-    u32 current_priority = 0; ///< Current thread priority, can be temporarily changed
+    /// Nominal thread priority, as set by the emulated application.
+    /// The nominal priority is the thread priority without priority
+    /// inheritance taken into account.
+    u32 nominal_priority = 0;
+
+    /// Current thread priority. This may change over the course of the
+    /// thread's lifetime in order to facilitate priority inheritance.
+    u32 current_priority = 0;
 
     u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
     u64 last_running_ticks = 0;   ///< CPU tick when thread was last running
@@ -460,14 +470,4 @@ private:
  */
 Thread* GetCurrentThread();
 
-/**
- * Waits the current thread on a sleep
- */
-void WaitCurrentThread_Sleep();
-
-/**
- * Stops the current thread and removes it from the thread_list
- */
-void ExitCurrentThread();
-
 } // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
new file mode 100644
index 000000000..26c4e5e67
--- /dev/null
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -0,0 +1,81 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/shared_memory.h"
+#include "core/hle/kernel/transfer_memory.h"
+#include "core/hle/result.h"
+
+namespace Kernel {
+
+TransferMemory::TransferMemory(KernelCore& kernel) : Object{kernel} {}
+TransferMemory::~TransferMemory() = default;
+
+SharedPtr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_address, u64 size,
+                                                 MemoryPermission permissions) {
+    SharedPtr<TransferMemory> transfer_memory{new TransferMemory(kernel)};
+
+    transfer_memory->base_address = base_address;
+    transfer_memory->memory_size = size;
+    transfer_memory->owner_permissions = permissions;
+    transfer_memory->owner_process = kernel.CurrentProcess();
+
+    return transfer_memory;
+}
+
+const u8* TransferMemory::GetPointer() const {
+    return backing_block.get()->data();
+}
+
+u64 TransferMemory::GetSize() const {
+    return memory_size;
+}
+
+ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission permissions) {
+    if (memory_size != size) {
+        return ERR_INVALID_SIZE;
+    }
+
+    if (owner_permissions != permissions) {
+        return ERR_INVALID_STATE;
+    }
+
+    if (is_mapped) {
+        return ERR_INVALID_STATE;
+    }
+
+    backing_block = std::make_shared<std::vector<u8>>(size);
+
+    const auto map_state = owner_permissions == MemoryPermission::None
+                               ? MemoryState::TransferMemoryIsolated
+                               : MemoryState::TransferMemory;
+    auto& vm_manager = owner_process->VMManager();
+    const auto map_result = vm_manager.MapMemoryBlock(address, backing_block, 0, size, map_state);
+    if (map_result.Failed()) {
+        return map_result.Code();
+    }
+
+    is_mapped = true;
+    return RESULT_SUCCESS;
+}
+
+ResultCode TransferMemory::UnmapMemory(VAddr address, u64 size) {
+    if (memory_size != size) {
+        return ERR_INVALID_SIZE;
+    }
+
+    auto& vm_manager = owner_process->VMManager();
+    const auto result = vm_manager.UnmapRange(address, size);
+
+    if (result.IsError()) {
+        return result;
+    }
+
+    is_mapped = false;
+    return RESULT_SUCCESS;
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
new file mode 100644
index 000000000..a140b1e2b
--- /dev/null
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -0,0 +1,103 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "core/hle/kernel/object.h"
+
+union ResultCode;
+
+namespace Kernel {
+
+class KernelCore;
+class Process;
+
+enum class MemoryPermission : u32;
+
+/// Defines the interface for transfer memory objects.
+///
+/// Transfer memory is typically used for the purpose of
+/// transferring memory between separate process instances,
+/// thus the name.
+///
+class TransferMemory final : public Object {
+public:
+    static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory;
+
+    static SharedPtr<TransferMemory> Create(KernelCore& kernel, VAddr base_address, u64 size,
+                                            MemoryPermission permissions);
+
+    TransferMemory(const TransferMemory&) = delete;
+    TransferMemory& operator=(const TransferMemory&) = delete;
+
+    TransferMemory(TransferMemory&&) = delete;
+    TransferMemory& operator=(TransferMemory&&) = delete;
+
+    std::string GetTypeName() const override {
+        return "TransferMemory";
+    }
+
+    std::string GetName() const override {
+        return GetTypeName();
+    }
+
+    HandleType GetHandleType() const override {
+        return HANDLE_TYPE;
+    }
+
+    /// Gets a pointer to the backing block of this instance.
+    const u8* GetPointer() const;
+
+    /// Gets the size of the memory backing this instance in bytes.
+    u64 GetSize() const;
+
+    /// Attempts to map transfer memory with the given range and memory permissions.
+    ///
+    /// @param address     The base address to being mapping memory at.
+    /// @param size        The size of the memory to map, in bytes.
+    /// @param permissions The memory permissions to check against when mapping memory.
+    ///
+    /// @pre The given address, size, and memory permissions must all match
+    ///      the same values that were given when creating the transfer memory
+    ///      instance.
+    ///
+    ResultCode MapMemory(VAddr address, u64 size, MemoryPermission permissions);
+
+    /// Unmaps the transfer memory with the given range
+    ///
+    /// @param address The base address to begin unmapping memory at.
+    /// @param size    The size of the memory to unmap, in bytes.
+    ///
+    /// @pre The given address and size must be the same as the ones used
+    ///      to create the transfer memory instance.
+    ///
+    ResultCode UnmapMemory(VAddr address, u64 size);
+
+private:
+    explicit TransferMemory(KernelCore& kernel);
+    ~TransferMemory() override;
+
+    /// Memory block backing this instance.
+    std::shared_ptr<std::vector<u8>> backing_block;
+
+    /// The base address for the memory managed by this instance.
+    VAddr base_address = 0;
+
+    /// Size of the memory, in bytes, that this instance manages.
+    u64 memory_size = 0;
+
+    /// The memory permissions that are applied to this instance.
+    MemoryPermission owner_permissions{};
+
+    /// The process that this transfer memory instance was created under.
+    Process* owner_process = nullptr;
+
+    /// Whether or not this transfer memory instance has mapped memory.
+    bool is_mapped = false;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 05c59af34..ec0a480ce 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -7,29 +7,29 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/memory_hook.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"
 #include "core/memory_setup.h"
 
 namespace Kernel {
 namespace {
 const char* GetMemoryStateName(MemoryState state) {
     static constexpr const char* names[] = {
-        "Unmapped",         "Io",
-        "Normal",           "CodeStatic",
-        "CodeMutable",      "Heap",
-        "Shared",           "Unknown1",
-        "ModuleCodeStatic", "ModuleCodeMutable",
-        "IpcBuffer0",       "Stack",
-        "ThreadLocal",      "TransferMemoryIsolated",
-        "TransferMemory",   "ProcessMemory",
-        "Inaccessible",     "IpcBuffer1",
-        "IpcBuffer3",       "KernelStack",
+        "Unmapped",       "Io",
+        "Normal",         "Code",
+        "CodeData",       "Heap",
+        "Shared",         "Unknown1",
+        "ModuleCode",     "ModuleCodeData",
+        "IpcBuffer0",     "Stack",
+        "ThreadLocal",    "TransferMemoryIsolated",
+        "TransferMemory", "ProcessMemory",
+        "Inaccessible",   "IpcBuffer1",
+        "IpcBuffer3",     "KernelStack",
     };
 
     return names[ToSvcMemoryState(state)];
@@ -177,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
 
 ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
                                                    MemoryState state,
-                                                   Memory::MemoryHookPointer mmio_handler) {
+                                                   Common::MemoryHookPointer mmio_handler) {
     // This is the appropriately sized VMA that will turn into our allocation.
     CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
     VirtualMemoryArea& final_vma = vma_handle->second;
@@ -256,57 +256,50 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
     return RESULT_SUCCESS;
 }
 
-ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
-    if (!IsWithinHeapRegion(target, size)) {
-        return ERR_INVALID_ADDRESS;
+ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
+    if (size > GetHeapRegionSize()) {
+        return ERR_OUT_OF_MEMORY;
+    }
+
+    // No need to do any additional work if the heap is already the given size.
+    if (size == GetCurrentHeapSize()) {
+        return MakeResult(heap_region_base);
     }
 
     if (heap_memory == nullptr) {
         // Initialize heap
-        heap_memory = std::make_shared<std::vector<u8>>();
-        heap_start = heap_end = target;
+        heap_memory = std::make_shared<std::vector<u8>>(size);
+        heap_end = heap_region_base + size;
     } else {
-        UnmapRange(heap_start, heap_end - heap_start);
-    }
-
-    // If necessary, expand backing vector to cover new heap extents.
-    if (target < heap_start) {
-        heap_memory->insert(begin(*heap_memory), heap_start - target, 0);
-        heap_start = target;
-        RefreshMemoryBlockMappings(heap_memory.get());
-    }
-    if (target + size > heap_end) {
-        heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0);
-        heap_end = target + size;
-        RefreshMemoryBlockMappings(heap_memory.get());
+        UnmapRange(heap_region_base, GetCurrentHeapSize());
     }
-    ASSERT(heap_end - heap_start == heap_memory->size());
 
-    CASCADE_RESULT(auto vma, MapMemoryBlock(target, heap_memory, target - heap_start, size,
-                                            MemoryState::Heap));
-    Reprotect(vma, perms);
+    // If necessary, expand backing vector to cover new heap extents in
+    // the case of allocating. Otherwise, shrink the backing memory,
+    // if a smaller heap has been requested.
+    const u64 old_heap_size = GetCurrentHeapSize();
+    if (size > old_heap_size) {
+        const u64 alloc_size = size - old_heap_size;
 
-    heap_used = size;
-
-    return MakeResult<VAddr>(heap_end - size);
-}
+        heap_memory->insert(heap_memory->end(), alloc_size, 0);
+        RefreshMemoryBlockMappings(heap_memory.get());
+    } else if (size < old_heap_size) {
+        heap_memory->resize(size);
+        heap_memory->shrink_to_fit();
 
-ResultCode VMManager::HeapFree(VAddr target, u64 size) {
-    if (!IsWithinHeapRegion(target, size)) {
-        return ERR_INVALID_ADDRESS;
+        RefreshMemoryBlockMappings(heap_memory.get());
     }
 
-    if (size == 0) {
-        return RESULT_SUCCESS;
-    }
+    heap_end = heap_region_base + size;
+    ASSERT(GetCurrentHeapSize() == heap_memory->size());
 
-    const ResultCode result = UnmapRange(target, size);
-    if (result.IsError()) {
-        return result;
+    const auto mapping_result =
+        MapMemoryBlock(heap_region_base, heap_memory, 0, size, MemoryState::Heap);
+    if (mapping_result.Failed()) {
+        return mapping_result.Code();
     }
 
-    heap_used -= size;
-    return RESULT_SUCCESS;
+    return MakeResult<VAddr>(heap_region_base);
 }
 
 MemoryInfo VMManager::QueryMemory(VAddr address) const {
@@ -598,6 +591,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
 
     heap_region_base = map_region_end;
     heap_region_end = heap_region_base + heap_region_size;
+    heap_end = heap_region_base;
 
     new_map_region_base = heap_region_end;
     new_map_region_end = new_map_region_base + new_map_region_size;
@@ -624,7 +618,7 @@ void VMManager::ClearPageTable() {
     std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
     page_table.special_regions.clear();
     std::fill(page_table.attributes.begin(), page_table.attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);
 }
 
 VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,
@@ -692,10 +686,6 @@ u64 VMManager::GetTotalMemoryUsage() const {
     return 0xF8000000;
 }
 
-u64 VMManager::GetTotalHeapUsage() const {
-    return heap_used;
-}
-
 VAddr VMManager::GetAddressSpaceBaseAddress() const {
     return address_space_base;
 }
@@ -778,6 +768,10 @@ u64 VMManager::GetHeapRegionSize() const {
     return heap_region_end - heap_region_base;
 }
 
+u64 VMManager::GetCurrentHeapSize() const {
+    return heap_end - heap_region_base;
+}
+
 bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
     return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
                                 GetHeapRegionEndAddress());
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 88e0b3c02..6f484b7bf 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,9 +9,10 @@
 #include <tuple>
 #include <vector>
 #include "common/common_types.h"
+#include "common/memory_hook.h"
+#include "common/page_table.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"
 
 namespace FileSys {
 enum class ProgramAddressSpaceType : u8;
@@ -164,12 +165,12 @@ enum class MemoryState : u32 {
     Unmapped               = 0x00,
     Io                     = 0x01 | FlagMapped,
     Normal                 = 0x02 | FlagMapped | FlagQueryPhysicalAddressAllowed,
-    CodeStatic             = 0x03 | CodeFlags  | FlagMapProcess,
-    CodeMutable            = 0x04 | CodeFlags  | FlagMapProcess | FlagCodeMemory,
+    Code                   = 0x03 | CodeFlags  | FlagMapProcess,
+    CodeData               = 0x04 | DataFlags  | FlagMapProcess | FlagCodeMemory,
     Heap                   = 0x05 | DataFlags  | FlagCodeMemory,
     Shared                 = 0x06 | FlagMapped | FlagMemoryPoolAllocated,
-    ModuleCodeStatic       = 0x08 | CodeFlags  | FlagModule | FlagMapProcess,
-    ModuleCodeMutable      = 0x09 | DataFlags  | FlagModule | FlagMapProcess | FlagCodeMemory,
+    ModuleCode             = 0x08 | CodeFlags  | FlagModule | FlagMapProcess,
+    ModuleCodeData         = 0x09 | DataFlags  | FlagModule | FlagMapProcess | FlagCodeMemory,
 
     IpcBuffer0             = 0x0A | FlagMapped | FlagQueryPhysicalAddressAllowed | FlagMemoryPoolAllocated |
                                     IPCFlags | FlagSharedDevice | FlagSharedDeviceAligned,
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
     // Settings for type = MMIO
     /// Physical address of the register area this VMA maps to.
     PAddr paddr = 0;
-    Memory::MemoryHookPointer mmio_handler = nullptr;
+    Common::MemoryHookPointer mmio_handler = nullptr;
 
     /// Tests if this area can be merged to the right with `next`.
     bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
      * @param mmio_handler The handler that will implement read and write for this MMIO region.
      */
     ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
-                                 Memory::MemoryHookPointer mmio_handler);
+                                 Common::MemoryHookPointer mmio_handler);
 
     /// Unmaps a range of addresses, splitting VMAs as necessary.
     ResultCode UnmapRange(VAddr target, u64 size);
@@ -379,11 +380,41 @@ public:
     /// Changes the permissions of a range of addresses, splitting VMAs as necessary.
     ResultCode ReprotectRange(VAddr target, u64 size, VMAPermission new_perms);
 
-    ResultVal<VAddr> HeapAllocate(VAddr target, u64 size, VMAPermission perms);
-    ResultCode HeapFree(VAddr target, u64 size);
-
     ResultCode MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, MemoryState state);
 
+    /// Attempts to allocate a heap with the given size.
+    ///
+    /// @param size The size of the heap to allocate in bytes.
+    ///
+    /// @note If a heap is currently allocated, and this is called
+    ///       with a size that is equal to the size of the current heap,
+    ///       then this function will do nothing and return the current
+    ///       heap's starting address, as there's no need to perform
+    ///       any additional heap allocation work.
+    ///
+    /// @note If a heap is currently allocated, and this is called
+    ///       with a size less than the current heap's size, then
+    ///       this function will attempt to shrink the heap.
+    ///
+    /// @note If a heap is currently allocated, and this is called
+    ///       with a size larger than the current heap's size, then
+    ///       this function will attempt to extend the size of the heap.
+    ///
+    /// @returns A result indicating either success or failure.
+    ///          <p>
+    ///          If successful, this function will return a result
+    ///          containing the starting address to the allocated heap.
+    ///          <p>
+    ///          If unsuccessful, this function will return a result
+    ///          containing an error code.
+    ///
+    /// @pre The given size must lie within the allowable heap
+    ///      memory region managed by this VMManager instance.
+    ///      Failure to abide by this will result in ERR_OUT_OF_MEMORY
+    ///      being returned as the result.
+    ///
+    ResultVal<VAddr> SetHeapSize(u64 size);
+
     /// Queries the memory manager for information about the given address.
     ///
     /// @param address The address to query the memory manager about for information.
@@ -417,9 +448,6 @@ public:
     /// Gets the total memory usage, used by svcGetInfo
     u64 GetTotalMemoryUsage() const;
 
-    /// Gets the total heap usage, used by svcGetInfo
-    u64 GetTotalHeapUsage() const;
-
     /// Gets the address space base address
     VAddr GetAddressSpaceBaseAddress() const;
 
@@ -468,6 +496,13 @@ public:
     /// Gets the total size of the heap region in bytes.
     u64 GetHeapRegionSize() const;
 
+    /// Gets the total size of the current heap in bytes.
+    ///
+    /// @note This is the current allocated heap size, not the size
+    ///       of the region it's allowed to exist within.
+    ///
+    u64 GetCurrentHeapSize() const;
+
     /// Determines whether or not the specified range is within the heap region.
     bool IsWithinHeapRegion(VAddr address, u64 size) const;
 
@@ -509,7 +544,7 @@ public:
 
     /// Each VMManager has its own page table, which is set as the main one when the owning process
     /// is scheduled.
-    Memory::PageTable page_table;
+    Common::PageTable page_table{Memory::PAGE_BITS};
 
 private:
     using VMAIter = VMAMap::iterator;
@@ -624,9 +659,9 @@ private:
     // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
     // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
     std::shared_ptr<std::vector<u8>> heap_memory;
-    // The left/right bounds of the address space covered by heap_memory.
-    VAddr heap_start = 0;
+
+    // The end of the currently allocated heap. This is not an inclusive
+    // end of the range. This is essentially 'base_address + current_size'.
     VAddr heap_end = 0;
-    u64 heap_used = 0;
 };
 } // namespace Kernel
diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/wait_object.h
index 5987fb971..04464a51a 100644
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/wait_object.h
@@ -24,7 +24,7 @@ public:
      * @param thread The thread about which we're deciding.
      * @return True if the current thread should wait due to this object being unavailable
      */
-    virtual bool ShouldWait(Thread* thread) const = 0;
+    virtual bool ShouldWait(const Thread* thread) const = 0;
 
     /// Acquire/lock the object for the specified thread if it is available
     virtual void Acquire(Thread* thread) = 0;
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index ab84f5ddc..8a3701151 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -119,10 +119,6 @@ union ResultCode {
     BitField<0, 9, ErrorModule> module;
     BitField<9, 13, u32> description;
 
-    // The last bit of `level` is checked by apps and the kernel to determine if a result code is an
-    // error
-    BitField<31, 1, u32> is_error;
-
     constexpr explicit ResultCode(u32 raw) : raw(raw) {}
 
     constexpr ResultCode(ErrorModule module_, u32 description_)
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 3f009d2b7..85271d418 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -2,10 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <array>
 #include <cinttypes>
 #include <cstring>
-#include <stack>
 #include "audio_core/audio_renderer.h"
 #include "core/core.h"
 #include "core/file_sys/savedata_factory.h"
@@ -13,7 +13,7 @@
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
-#include "core/hle/kernel/shared_memory.h"
+#include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/acc/profile_manager.h"
 #include "core/hle/service/am/am.h"
@@ -93,38 +93,84 @@ void IWindowController::AcquireForegroundRights(Kernel::HLERequestContext& ctx)
 }
 
 IAudioController::IAudioController() : ServiceFramework("IAudioController") {
+    // clang-format off
     static const FunctionInfo functions[] = {
         {0, &IAudioController::SetExpectedMasterVolume, "SetExpectedMasterVolume"},
-        {1, &IAudioController::GetMainAppletExpectedMasterVolume,
-         "GetMainAppletExpectedMasterVolume"},
-        {2, &IAudioController::GetLibraryAppletExpectedMasterVolume,
-         "GetLibraryAppletExpectedMasterVolume"},
-        {3, nullptr, "ChangeMainAppletMasterVolume"},
-        {4, nullptr, "SetTransparentVolumeRate"},
+        {1, &IAudioController::GetMainAppletExpectedMasterVolume, "GetMainAppletExpectedMasterVolume"},
+        {2, &IAudioController::GetLibraryAppletExpectedMasterVolume, "GetLibraryAppletExpectedMasterVolume"},
+        {3, &IAudioController::ChangeMainAppletMasterVolume, "ChangeMainAppletMasterVolume"},
+        {4, &IAudioController::SetTransparentAudioRate, "SetTransparentVolumeRate"},
     };
+    // clang-format on
+
     RegisterHandlers(functions);
 }
 
 IAudioController::~IAudioController() = default;
 
 void IAudioController::SetExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    IPC::RequestParser rp{ctx};
+    const float main_applet_volume_tmp = rp.Pop<float>();
+    const float library_applet_volume_tmp = rp.Pop<float>();
+
+    LOG_DEBUG(Service_AM, "called. main_applet_volume={}, library_applet_volume={}",
+              main_applet_volume_tmp, library_applet_volume_tmp);
+
+    // Ensure the volume values remain within the 0-100% range
+    main_applet_volume = std::clamp(main_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
+    library_applet_volume =
+        std::clamp(library_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
+
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
 void IAudioController::GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called. main_applet_volume={}", main_applet_volume);
     IPC::ResponseBuilder rb{ctx, 3};
     rb.Push(RESULT_SUCCESS);
-    rb.Push(volume);
+    rb.Push(main_applet_volume);
 }
 
 void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called. library_applet_volume={}", library_applet_volume);
     IPC::ResponseBuilder rb{ctx, 3};
     rb.Push(RESULT_SUCCESS);
-    rb.Push(volume);
+    rb.Push(library_applet_volume);
+}
+
+void IAudioController::ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx) {
+    struct Parameters {
+        float volume;
+        s64 fade_time_ns;
+    };
+    static_assert(sizeof(Parameters) == 16);
+
+    IPC::RequestParser rp{ctx};
+    const auto parameters = rp.PopRaw<Parameters>();
+
+    LOG_DEBUG(Service_AM, "called. volume={}, fade_time_ns={}", parameters.volume,
+              parameters.fade_time_ns);
+
+    main_applet_volume = std::clamp(parameters.volume, min_allowed_volume, max_allowed_volume);
+    fade_time_ns = std::chrono::nanoseconds{parameters.fade_time_ns};
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
+void IAudioController::SetTransparentAudioRate(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const float transparent_volume_rate_tmp = rp.Pop<float>();
+
+    LOG_DEBUG(Service_AM, "called. transparent_volume_rate={}", transparent_volume_rate_tmp);
+
+    // Clamp volume range to 0-100%.
+    transparent_volume_rate =
+        std::clamp(transparent_volume_rate_tmp, min_allowed_volume, max_allowed_volume);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
 }
 
 IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") {
@@ -169,7 +215,21 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
 
 IDisplayController::~IDisplayController() = default;
 
-IDebugFunctions::IDebugFunctions() : ServiceFramework("IDebugFunctions") {}
+IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
+    // clang-format off
+    static const FunctionInfo functions[] = {
+        {0, nullptr, "NotifyMessageToHomeMenuForDebug"},
+        {1, nullptr, "OpenMainApplication"},
+        {10, nullptr, "EmulateButtonEvent"},
+        {20, nullptr, "InvalidateTransitionLayer"},
+        {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},
+        {40, nullptr, "GetAppletResourceUsageInfo"},
+    };
+    // clang-format on
+
+    RegisterHandlers(functions);
+}
+
 IDebugFunctions::~IDebugFunctions() = default;
 
 ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
@@ -179,8 +239,8 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
         {0, nullptr, "Exit"},
         {1, &ISelfController::LockExit, "LockExit"},
         {2, &ISelfController::UnlockExit, "UnlockExit"},
-        {3, nullptr, "EnterFatalSection"},
-        {4, nullptr, "LeaveFatalSection"},
+        {3, &ISelfController::EnterFatalSection, "EnterFatalSection"},
+        {4, &ISelfController::LeaveFatalSection, "LeaveFatalSection"},
         {9, &ISelfController::GetLibraryAppletLaunchableEvent, "GetLibraryAppletLaunchableEvent"},
         {10, &ISelfController::SetScreenShotPermission, "SetScreenShotPermission"},
         {11, &ISelfController::SetOperationModeChangedNotification, "SetOperationModeChangedNotification"},
@@ -225,41 +285,54 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
 
 ISelfController::~ISelfController() = default;
 
-void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) {
-    // Takes 3 input u8s with each field located immediately after the previous
-    // u8, these are bool flags. No output.
+void ISelfController::LockExit(Kernel::HLERequestContext& ctx) {
     LOG_WARNING(Service_AM, "(STUBBED) called");
 
-    IPC::RequestParser rp{ctx};
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
 
-    struct FocusHandlingModeParams {
-        u8 unknown0;
-        u8 unknown1;
-        u8 unknown2;
-    };
-    auto flags = rp.PopRaw<FocusHandlingModeParams>();
+void ISelfController::UnlockExit(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service_AM, "(STUBBED) called");
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::SetRestartMessageEnabled(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+void ISelfController::EnterFatalSection(Kernel::HLERequestContext& ctx) {
+    ++num_fatal_sections_entered;
+    LOG_DEBUG(Service_AM, "called. Num fatal sections entered: {}", num_fatal_sections_entered);
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
+void ISelfController::LeaveFatalSection(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_AM, "called.");
 
-    bool flag = rp.Pop<bool>();
-    LOG_WARNING(Service_AM, "(STUBBED) called flag={}", flag);
+    // Entry and exit of fatal sections must be balanced.
+    if (num_fatal_sections_entered == 0) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ResultCode{ErrorModule::AM, 512});
+        return;
+    }
+
+    --num_fatal_sections_entered;
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
+void ISelfController::GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service_AM, "(STUBBED) called");
+
+    launchable_event.writable->Signal();
+
+    IPC::ResponseBuilder rb{ctx, 2, 1};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushCopyObjects(launchable_event.readable);
+}
+
 void ISelfController::SetScreenShotPermission(Kernel::HLERequestContext& ctx) {
     LOG_WARNING(Service_AM, "(STUBBED) called");
 
@@ -277,40 +350,52 @@ void ISelfController::SetOperationModeChangedNotification(Kernel::HLERequestCont
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx) {
-    // Takes 3 input u8s with each field located immediately after the previous
-    // u8, these are bool flags. No output.
+void ISelfController::SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
 
-    bool enabled = rp.Pop<bool>();
-    LOG_WARNING(Service_AM, "(STUBBED) called enabled={}", enabled);
+    bool flag = rp.Pop<bool>();
+    LOG_WARNING(Service_AM, "(STUBBED) called flag={}", flag);
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::LockExit(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) {
+    // Takes 3 input u8s with each field located immediately after the previous
+    // u8, these are bool flags. No output.
+    IPC::RequestParser rp{ctx};
+
+    struct FocusHandlingModeParams {
+        u8 unknown0;
+        u8 unknown1;
+        u8 unknown2;
+    };
+    const auto flags = rp.PopRaw<FocusHandlingModeParams>();
+
+    LOG_WARNING(Service_AM, "(STUBBED) called. unknown0={}, unknown1={}, unknown2={}",
+                flags.unknown0, flags.unknown1, flags.unknown2);
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::UnlockExit(Kernel::HLERequestContext& ctx) {
+void ISelfController::SetRestartMessageEnabled(Kernel::HLERequestContext& ctx) {
     LOG_WARNING(Service_AM, "(STUBBED) called");
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
-void ISelfController::GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx) {
+    // Takes 3 input u8s with each field located immediately after the previous
+    // u8, these are bool flags. No output.
+    IPC::RequestParser rp{ctx};
 
-    launchable_event.writable->Signal();
+    bool enabled = rp.Pop<bool>();
+    LOG_WARNING(Service_AM, "(STUBBED) called enabled={}", enabled);
 
-    IPC::ResponseBuilder rb{ctx, 2, 1};
+    IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
-    rb.PushCopyObjects(launchable_event.readable);
 }
 
 void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) {
@@ -847,19 +932,19 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
     rp.SetCurrentOffset(3);
     const auto handle{rp.Pop<Kernel::Handle>()};
 
-    const auto shared_mem =
-        Core::System::GetInstance().CurrentProcess()->GetHandleTable().Get<Kernel::SharedMemory>(
+    const auto transfer_mem =
+        Core::System::GetInstance().CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(
             handle);
 
-    if (shared_mem == nullptr) {
+    if (transfer_mem == nullptr) {
         LOG_ERROR(Service_AM, "shared_mem is a nullpr for handle={:08X}", handle);
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(ResultCode(-1));
         return;
     }
 
-    const u8* mem_begin = shared_mem->GetPointer();
-    const u8* mem_end = mem_begin + shared_mem->GetSize();
+    const u8* const mem_begin = transfer_mem->GetPointer();
+    const u8* const mem_end = mem_begin + transfer_mem->GetSize();
     std::vector<u8> memory{mem_begin, mem_end};
 
     IPC::ResponseBuilder rb{ctx, 2, 0, 1};
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index b6113cfdd..991b7d47c 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <chrono>
 #include <memory>
 #include <queue>
 #include "core/hle/kernel/writable_event.h"
@@ -81,8 +82,21 @@ private:
     void SetExpectedMasterVolume(Kernel::HLERequestContext& ctx);
     void GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
     void GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
+    void ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx);
+    void SetTransparentAudioRate(Kernel::HLERequestContext& ctx);
 
-    u32 volume{100};
+    static constexpr float min_allowed_volume = 0.0f;
+    static constexpr float max_allowed_volume = 1.0f;
+
+    float main_applet_volume{0.25f};
+    float library_applet_volume{max_allowed_volume};
+    float transparent_volume_rate{min_allowed_volume};
+
+    // Volume transition fade time in nanoseconds.
+    // e.g. If the main applet volume was 0% and was changed to 50%
+    //      with a fade of 50ns, then over the course of 50ns,
+    //      the volume will gradually fade up to 50%
+    std::chrono::nanoseconds fade_time_ns{0};
 };
 
 class IDisplayController final : public ServiceFramework<IDisplayController> {
@@ -103,17 +117,19 @@ public:
     ~ISelfController() override;
 
 private:
-    void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
-    void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
-    void SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx);
-    void SetOperationModeChangedNotification(Kernel::HLERequestContext& ctx);
-    void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
     void LockExit(Kernel::HLERequestContext& ctx);
     void UnlockExit(Kernel::HLERequestContext& ctx);
+    void EnterFatalSection(Kernel::HLERequestContext& ctx);
+    void LeaveFatalSection(Kernel::HLERequestContext& ctx);
     void GetLibraryAppletLaunchableEvent(Kernel::HLERequestContext& ctx);
+    void SetScreenShotPermission(Kernel::HLERequestContext& ctx);
+    void SetOperationModeChangedNotification(Kernel::HLERequestContext& ctx);
+    void SetPerformanceModeChangedNotification(Kernel::HLERequestContext& ctx);
+    void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
+    void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
+    void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
     void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx);
     void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx);
-    void SetScreenShotPermission(Kernel::HLERequestContext& ctx);
     void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);
     void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
     void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
@@ -121,6 +137,7 @@ private:
     std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
     Kernel::EventPair launchable_event;
     u32 idle_time_detection_extension = 0;
+    u64 num_fatal_sections_entered = 0;
 };
 
 class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> {
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp
index 088410564..e5daefdde 100644
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -2,9 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "common/logging/log.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/service/audio/audin_u.h"
 
 namespace Service::Audio {
@@ -33,7 +30,6 @@ public:
 
         RegisterHandlers(functions);
     }
-    ~IAudioIn() = default;
 };
 
 AudInU::AudInU() : ServiceFramework("audin:u") {
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 21f5e64c7..39acb7b23 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -150,7 +150,6 @@ private:
     void GetReleasedAudioOutBufferImpl(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Service_Audio, "called {}", ctx.Description());
 
-        IPC::RequestParser rp{ctx};
         const u64 max_count{ctx.GetWriteBufferSize() / sizeof(u64)};
         const auto released_buffers{audio_core.GetTagsAndReleaseBuffers(stream, max_count)};
 
@@ -194,12 +193,9 @@ private:
 void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_Audio, "called");
 
-    IPC::RequestParser rp{ctx};
-
     ctx.WriteBuffer(DefaultDevice);
 
     IPC::ResponseBuilder rb{ctx, 3};
-
     rb.Push(RESULT_SUCCESS);
     rb.Push<u32>(1); // Amount of audio devices
 }
diff --git a/src/core/hle/service/audio/audrec_u.cpp b/src/core/hle/service/audio/audrec_u.cpp
index 6956a2e64..1a5aed9ed 100644
--- a/src/core/hle/service/audio/audrec_u.cpp
+++ b/src/core/hle/service/audio/audrec_u.cpp
@@ -2,9 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "common/logging/log.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/service/audio/audrec_u.h"
 
 namespace Service::Audio {
@@ -30,7 +27,6 @@ public:
 
         RegisterHandlers(functions);
     }
-    ~IFinalOutputRecorder() = default;
 };
 
 AudRecU::AudRecU() : ServiceFramework("audrec:u") {
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index c9de10a24..1dde6edb7 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -10,6 +10,7 @@
 #include "common/alignment.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
+#include "common/string_util.h"
 #include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/hle_ipc.h"
@@ -184,7 +185,6 @@ public:
 private:
     void ListAudioDeviceName(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_Audio, "(STUBBED) called");
-        IPC::RequestParser rp{ctx};
 
         constexpr std::array<char, 15> audio_interface{{"AudioInterface"}};
         ctx.WriteBuffer(audio_interface);
@@ -195,13 +195,13 @@ private:
     }
 
     void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
-        LOG_WARNING(Service_Audio, "(STUBBED) called");
-
         IPC::RequestParser rp{ctx};
-        f32 volume = static_cast<f32>(rp.Pop<u32>());
+        const f32 volume = rp.Pop<f32>();
 
-        auto file_buffer = ctx.ReadBuffer();
-        auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
+        const auto device_name_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(device_name_buffer);
+
+        LOG_WARNING(Service_Audio, "(STUBBED) called. name={}, volume={}", name, volume);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
@@ -209,7 +209,6 @@ private:
 
     void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_Audio, "(STUBBED) called");
-        IPC::RequestParser rp{ctx};
 
         constexpr std::array<char, 12> audio_interface{{"AudioDevice"}};
         ctx.WriteBuffer(audio_interface);
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 377e12cfa..cb4a1160d 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include <opus.h>
+#include <opus_multistream.h>
 
 #include "common/assert.h"
 #include "common/logging/log.h"
@@ -18,12 +19,12 @@
 namespace Service::Audio {
 namespace {
 struct OpusDeleter {
-    void operator()(void* ptr) const {
-        operator delete(ptr);
+    void operator()(OpusMSDecoder* ptr) const {
+        opus_multistream_decoder_destroy(ptr);
     }
 };
 
-using OpusDecoderPtr = std::unique_ptr<OpusDecoder, OpusDeleter>;
+using OpusDecoderPtr = std::unique_ptr<OpusMSDecoder, OpusDeleter>;
 
 struct OpusPacketHeader {
     // Packet size in bytes.
@@ -33,7 +34,7 @@ struct OpusPacketHeader {
 };
 static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
 
-class OpusDecoderStateBase {
+class OpusDecoderState {
 public:
     /// Describes extra behavior that may be asked of the decoding context.
     enum class ExtraBehavior {
@@ -49,22 +50,13 @@ public:
         Enabled,
     };
 
-    virtual ~OpusDecoderStateBase() = default;
-
-    // Decodes interleaved Opus packets. Optionally allows reporting time taken to
-    // perform the decoding, as well as any relevant extra behavior.
-    virtual void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
-                                   ExtraBehavior extra_behavior) = 0;
-};
-
-// Represents the decoder state for a non-multistream decoder.
-class OpusDecoderState final : public OpusDecoderStateBase {
-public:
     explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
         : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
 
+    // Decodes interleaved Opus packets. Optionally allows reporting time taken to
+    // perform the decoding, as well as any relevant extra behavior.
     void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
-                           ExtraBehavior extra_behavior) override {
+                           ExtraBehavior extra_behavior) {
         if (perf_time == PerfTime::Disabled) {
             DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
         } else {
@@ -135,7 +127,7 @@ private:
 
         const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
         const auto out_sample_count =
-            opus_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
+            opus_multistream_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
         if (out_sample_count < 0) {
             LOG_ERROR(Audio,
                       "Incorrect sample count received from opus_decode, "
@@ -158,7 +150,7 @@ private:
     void ResetDecoderContext() {
         ASSERT(decoder != nullptr);
 
-        opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
+        opus_multistream_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
     }
 
     OpusDecoderPtr decoder;
@@ -168,7 +160,7 @@ private:
 
 class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
 public:
-    explicit IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoderStateBase> decoder_state)
+    explicit IHardwareOpusDecoderManager(OpusDecoderState decoder_state)
         : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
         // clang-format off
         static const FunctionInfo functions[] = {
@@ -190,35 +182,51 @@ private:
     void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Audio, "called");
 
-        decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Disabled,
-                                         OpusDecoderStateBase::ExtraBehavior::None);
+        decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Disabled,
+                                        OpusDecoderState::ExtraBehavior::None);
     }
 
     void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Audio, "called");
 
-        decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
-                                         OpusDecoderStateBase::ExtraBehavior::None);
+        decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled,
+                                        OpusDecoderState::ExtraBehavior::None);
     }
 
     void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
         LOG_DEBUG(Audio, "called");
 
         IPC::RequestParser rp{ctx};
-        const auto extra_behavior = rp.Pop<bool>()
-                                        ? OpusDecoderStateBase::ExtraBehavior::ResetContext
-                                        : OpusDecoderStateBase::ExtraBehavior::None;
+        const auto extra_behavior = rp.Pop<bool>() ? OpusDecoderState::ExtraBehavior::ResetContext
+                                                   : OpusDecoderState::ExtraBehavior::None;
 
-        decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
-                                         extra_behavior);
+        decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled, extra_behavior);
     }
 
-    std::unique_ptr<OpusDecoderStateBase> decoder_state;
+    OpusDecoderState decoder_state;
 };
 
 std::size_t WorkerBufferSize(u32 channel_count) {
     ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
-    return opus_decoder_get_size(static_cast<int>(channel_count));
+    constexpr int num_streams = 1;
+    const int num_stereo_streams = channel_count == 2 ? 1 : 0;
+    return opus_multistream_decoder_get_size(num_streams, num_stereo_streams);
+}
+
+// Creates the mapping table that maps the input channels to the particular
+// output channels. In the stereo case, we map the left and right input channels
+// to the left and right output channels respectively.
+//
+// However, in the monophonic case, we only map the one available channel
+// to the sole output channel. We specify 255 for the would-be right channel
+// as this is a special value defined by Opus to indicate to the decoder to
+// ignore that channel.
+std::array<u8, 2> CreateMappingTable(u32 channel_count) {
+    if (channel_count == 2) {
+        return {{0, 1}};
+    }
+
+    return {{0, 255}};
 }
 } // Anonymous namespace
 
@@ -259,9 +267,15 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
     const std::size_t worker_sz = WorkerBufferSize(channel_count);
     ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
 
-    OpusDecoderPtr decoder{static_cast<OpusDecoder*>(operator new(worker_sz))};
-    if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
-        LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err);
+    const int num_stereo_streams = channel_count == 2 ? 1 : 0;
+    const auto mapping_table = CreateMappingTable(channel_count);
+
+    int error = 0;
+    OpusDecoderPtr decoder{
+        opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1,
+                                        num_stereo_streams, mapping_table.data(), &error)};
+    if (error != OPUS_OK || decoder == nullptr) {
+        LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error);
         IPC::ResponseBuilder rb{ctx, 2};
         // TODO(ogniK): Use correct error code
         rb.Push(ResultCode(-1));
@@ -271,7 +285,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
     IPC::ResponseBuilder rb{ctx, 2, 0, 1};
     rb.Push(RESULT_SUCCESS);
     rb.PushIpcInterface<IHardwareOpusDecoderManager>(
-        std::make_unique<OpusDecoderState>(std::move(decoder), sample_rate, channel_count));
+        OpusDecoderState{std::move(decoder), sample_rate, channel_count});
 }
 
 HwOpus::HwOpus() : ServiceFramework("hwopus") {
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index 770590d0b..2c229bcad 100644
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -25,21 +25,34 @@ Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
 Module::Interface::~Interface() = default;
 
 struct FatalInfo {
-    std::array<u64_le, 31> registers{}; // TODO(ogniK): See if this actually is registers or
-                                        // not(find a game which has non zero valeus)
-    u64_le unk0{};
-    u64_le unk1{};
-    u64_le unk2{};
-    u64_le unk3{};
-    u64_le unk4{};
-    u64_le unk5{};
-    u64_le unk6{};
+    enum class Architecture : s32 {
+        AArch64,
+        AArch32,
+    };
+
+    const char* ArchAsString() const {
+        return arch == Architecture::AArch64 ? "AArch64" : "AArch32";
+    }
+
+    std::array<u64_le, 31> registers{};
+    u64_le sp{};
+    u64_le pc{};
+    u64_le pstate{};
+    u64_le afsr0{};
+    u64_le afsr1{};
+    u64_le esr{};
+    u64_le far{};
 
     std::array<u64_le, 32> backtrace{};
-    u64_le unk7{};
-    u64_le unk8{};
+    u64_le program_entry_point{};
+
+    // Bit flags that indicate which registers have been set with values
+    // for this context. The service itself uses these to determine which
+    // registers to specifically print out.
+    u64_le set_flags{};
+
     u32_le backtrace_size{};
-    u32_le unk9{};
+    Architecture arch{};
     u32_le unk10{}; // TODO(ogniK): Is this even used or is it just padding?
 };
 static_assert(sizeof(FatalInfo) == 0x250, "FatalInfo is an invalid size");
@@ -52,36 +65,36 @@ enum class FatalType : u32 {
 
 static void GenerateErrorReport(ResultCode error_code, const FatalInfo& info) {
     const auto title_id = Core::CurrentProcess()->GetTitleID();
-    std::string crash_report =
-        fmt::format("Yuzu {}-{} crash report\n"
-                    "Title ID:                        {:016x}\n"
-                    "Result:                          0x{:X} ({:04}-{:04d})\n"
-                    "\n",
-                    Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw,
-                    2000 + static_cast<u32>(error_code.module.Value()),
-                    static_cast<u32>(error_code.description.Value()), info.unk8, info.unk7);
+    std::string crash_report = fmt::format(
+        "Yuzu {}-{} crash report\n"
+        "Title ID:                        {:016x}\n"
+        "Result:                          0x{:X} ({:04}-{:04d})\n"
+        "Set flags:                       0x{:16X}\n"
+        "Program entry point:             0x{:16X}\n"
+        "\n",
+        Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw,
+        2000 + static_cast<u32>(error_code.module.Value()),
+        static_cast<u32>(error_code.description.Value()), info.set_flags, info.program_entry_point);
     if (info.backtrace_size != 0x0) {
         crash_report += "Registers:\n";
-        // TODO(ogniK): This is just a guess, find a game which actually has non zero values
         for (size_t i = 0; i < info.registers.size(); i++) {
             crash_report +=
                 fmt::format("    X[{:02d}]:                       {:016x}\n", i, info.registers[i]);
         }
-        crash_report += fmt::format("    Unknown 0:                   {:016x}\n", info.unk0);
-        crash_report += fmt::format("    Unknown 1:                   {:016x}\n", info.unk1);
-        crash_report += fmt::format("    Unknown 2:                   {:016x}\n", info.unk2);
-        crash_report += fmt::format("    Unknown 3:                   {:016x}\n", info.unk3);
-        crash_report += fmt::format("    Unknown 4:                   {:016x}\n", info.unk4);
-        crash_report += fmt::format("    Unknown 5:                   {:016x}\n", info.unk5);
-        crash_report += fmt::format("    Unknown 6:                   {:016x}\n", info.unk6);
+        crash_report += fmt::format("    SP:                          {:016x}\n", info.sp);
+        crash_report += fmt::format("    PC:                          {:016x}\n", info.pc);
+        crash_report += fmt::format("    PSTATE:                      {:016x}\n", info.pstate);
+        crash_report += fmt::format("    AFSR0:                       {:016x}\n", info.afsr0);
+        crash_report += fmt::format("    AFSR1:                       {:016x}\n", info.afsr1);
+        crash_report += fmt::format("    ESR:                         {:016x}\n", info.esr);
+        crash_report += fmt::format("    FAR:                         {:016x}\n", info.far);
         crash_report += "\nBacktrace:\n";
         for (size_t i = 0; i < info.backtrace_size; i++) {
             crash_report +=
                 fmt::format("    Backtrace[{:02d}]:               {:016x}\n", i, info.backtrace[i]);
         }
-        crash_report += fmt::format("\nUnknown 7:                       0x{:016x}\n", info.unk7);
-        crash_report += fmt::format("Unknown 8:                       0x{:016x}\n", info.unk8);
-        crash_report += fmt::format("Unknown 9:                       0x{:016x}\n", info.unk9);
+
+        crash_report += fmt::format("Architecture:                    {}\n", info.ArchAsString());
         crash_report += fmt::format("Unknown 10:                      0x{:016x}\n", info.unk10);
     }
 
@@ -125,13 +138,13 @@ static void ThrowFatalError(ResultCode error_code, FatalType fatal_type, const F
     case FatalType::ErrorReport:
         GenerateErrorReport(error_code, info);
         break;
-    };
+    }
 }
 
 void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
     LOG_ERROR(Service_Fatal, "called");
     IPC::RequestParser rp{ctx};
-    auto error_code = rp.Pop<ResultCode>();
+    const auto error_code = rp.Pop<ResultCode>();
 
     ThrowFatalError(error_code, FatalType::ErrorScreen, {});
     IPC::ResponseBuilder rb{ctx, 2};
@@ -141,8 +154,8 @@ void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
 void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
     LOG_ERROR(Service_Fatal, "called");
     IPC::RequestParser rp(ctx);
-    auto error_code = rp.Pop<ResultCode>();
-    auto fatal_type = rp.PopEnum<FatalType>();
+    const auto error_code = rp.Pop<ResultCode>();
+    const auto fatal_type = rp.PopEnum<FatalType>();
 
     ThrowFatalError(error_code, fatal_type, {}); // No info is passed with ThrowFatalWithPolicy
     IPC::ResponseBuilder rb{ctx, 2};
@@ -152,9 +165,9 @@ void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
 void Module::Interface::ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx) {
     LOG_ERROR(Service_Fatal, "called");
     IPC::RequestParser rp(ctx);
-    auto error_code = rp.Pop<ResultCode>();
-    auto fatal_type = rp.PopEnum<FatalType>();
-    auto fatal_info = ctx.ReadBuffer();
+    const auto error_code = rp.Pop<ResultCode>();
+    const auto fatal_type = rp.PopEnum<FatalType>();
+    const auto fatal_info = ctx.ReadBuffer();
     FatalInfo info{};
 
     ASSERT_MSG(fatal_info.size() == sizeof(FatalInfo), "Invalid fatal info buffer size!");
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index c6da2df43..4c2b371c3 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -197,13 +197,16 @@ ResultCode VfsDirectoryServiceWrapper::RenameDirectory(const std::string& src_pa
 
 ResultVal<FileSys::VirtualFile> VfsDirectoryServiceWrapper::OpenFile(const std::string& path_,
                                                                      FileSys::Mode mode) const {
-    std::string path(FileUtil::SanitizePath(path_));
-    auto npath = path;
-    while (npath.size() > 0 && (npath[0] == '/' || npath[0] == '\\'))
-        npath = npath.substr(1);
+    const std::string path(FileUtil::SanitizePath(path_));
+    std::string_view npath = path;
+    while (!npath.empty() && (npath[0] == '/' || npath[0] == '\\')) {
+        npath.remove_prefix(1);
+    }
+
     auto file = backing->GetFileRelative(npath);
-    if (file == nullptr)
+    if (file == nullptr) {
         return FileSys::ERROR_PATH_NOT_FOUND;
+    }
 
     if (mode == FileSys::Mode::Append) {
         return MakeResult<FileSys::VirtualFile>(
@@ -319,15 +322,15 @@ ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId stora
 }
 
 ResultVal<FileSys::VirtualDir> OpenSaveData(FileSys::SaveDataSpaceId space,
-                                            FileSys::SaveDataDescriptor save_struct) {
+                                            const FileSys::SaveDataDescriptor& descriptor) {
     LOG_TRACE(Service_FS, "Opening Save Data for space_id={:01X}, save_struct={}",
-              static_cast<u8>(space), save_struct.DebugInfo());
+              static_cast<u8>(space), descriptor.DebugInfo());
 
     if (save_data_factory == nullptr) {
         return FileSys::ERROR_ENTITY_NOT_FOUND;
     }
 
-    return save_data_factory->Open(space, save_struct);
+    return save_data_factory->Open(space, descriptor);
 }
 
 ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space) {
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index 6fd5e7b23..7cfc0d902 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -46,7 +46,7 @@ ResultVal<FileSys::VirtualFile> OpenRomFSCurrentProcess();
 ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId storage_id,
                                           FileSys::ContentRecordType type);
 ResultVal<FileSys::VirtualDir> OpenSaveData(FileSys::SaveDataSpaceId space,
-                                            FileSys::SaveDataDescriptor save_struct);
+                                            const FileSys::SaveDataDescriptor& descriptor);
 ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space);
 ResultVal<FileSys::VirtualDir> OpenSDMC();
 
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index 54959edd8..657baddb8 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -315,61 +315,53 @@ public:
     void CreateFile(Kernel::HLERequestContext& ctx) {
         IPC::RequestParser rp{ctx};
 
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        u64 mode = rp.Pop<u64>();
-        u32 size = rp.Pop<u32>();
+        const u64 mode = rp.Pop<u64>();
+        const u32 size = rp.Pop<u32>();
 
-        LOG_DEBUG(Service_FS, "called file {} mode 0x{:X} size 0x{:08X}", name, mode, size);
+        LOG_DEBUG(Service_FS, "called. file={}, mode=0x{:X}, size=0x{:08X}", name, mode, size);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.CreateFile(name, size));
     }
 
     void DeleteFile(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        LOG_DEBUG(Service_FS, "called file {}", name);
+        LOG_DEBUG(Service_FS, "called. file={}", name);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.DeleteFile(name));
     }
 
     void CreateDirectory(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        LOG_DEBUG(Service_FS, "called directory {}", name);
+        LOG_DEBUG(Service_FS, "called. directory={}", name);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.CreateDirectory(name));
     }
 
     void DeleteDirectory(Kernel::HLERequestContext& ctx) {
-        const IPC::RequestParser rp{ctx};
-
         const auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        LOG_DEBUG(Service_FS, "called directory {}", name);
+        LOG_DEBUG(Service_FS, "called. directory={}", name);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.DeleteDirectory(name));
     }
 
     void DeleteDirectoryRecursively(Kernel::HLERequestContext& ctx) {
-        const IPC::RequestParser rp{ctx};
-
         const auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        LOG_DEBUG(Service_FS, "called directory {}", name);
+        LOG_DEBUG(Service_FS, "called. directory={}", name);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.DeleteDirectoryRecursively(name));
@@ -386,18 +378,16 @@ public:
     }
 
     void RenameFile(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-
         std::vector<u8> buffer;
         buffer.resize(ctx.BufferDescriptorX()[0].Size());
         Memory::ReadBlock(ctx.BufferDescriptorX()[0].Address(), buffer.data(), buffer.size());
-        std::string src_name = Common::StringFromBuffer(buffer);
+        const std::string src_name = Common::StringFromBuffer(buffer);
 
         buffer.resize(ctx.BufferDescriptorX()[1].Size());
         Memory::ReadBlock(ctx.BufferDescriptorX()[1].Address(), buffer.data(), buffer.size());
-        std::string dst_name = Common::StringFromBuffer(buffer);
+        const std::string dst_name = Common::StringFromBuffer(buffer);
 
-        LOG_DEBUG(Service_FS, "called file '{}' to file '{}'", src_name, dst_name);
+        LOG_DEBUG(Service_FS, "called. file '{}' to file '{}'", src_name, dst_name);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(backend.RenameFile(src_name, dst_name));
@@ -406,12 +396,12 @@ public:
     void OpenFile(Kernel::HLERequestContext& ctx) {
         IPC::RequestParser rp{ctx};
 
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());
+        const auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());
 
-        LOG_DEBUG(Service_FS, "called file {} mode {}", name, static_cast<u32>(mode));
+        LOG_DEBUG(Service_FS, "called. file={}, mode={}", name, static_cast<u32>(mode));
 
         auto result = backend.OpenFile(name, mode);
         if (result.Failed()) {
@@ -430,13 +420,13 @@ public:
     void OpenDirectory(Kernel::HLERequestContext& ctx) {
         IPC::RequestParser rp{ctx};
 
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
         // TODO(Subv): Implement this filter.
-        u32 filter_flags = rp.Pop<u32>();
+        const u32 filter_flags = rp.Pop<u32>();
 
-        LOG_DEBUG(Service_FS, "called directory {} filter {}", name, filter_flags);
+        LOG_DEBUG(Service_FS, "called. directory={}, filter={}", name, filter_flags);
 
         auto result = backend.OpenDirectory(name);
         if (result.Failed()) {
@@ -453,12 +443,10 @@ public:
     }
 
     void GetEntryType(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-
-        auto file_buffer = ctx.ReadBuffer();
-        std::string name = Common::StringFromBuffer(file_buffer);
+        const auto file_buffer = ctx.ReadBuffer();
+        const std::string name = Common::StringFromBuffer(file_buffer);
 
-        LOG_DEBUG(Service_FS, "called file {}", name);
+        LOG_DEBUG(Service_FS, "called. file={}", name);
 
         auto result = backend.GetEntryType(name);
         if (result.Failed()) {
@@ -616,7 +604,9 @@ private:
         u64_le save_id;
         u64_le title_id;
         u64_le save_image_size;
-        INSERT_PADDING_BYTES(0x28);
+        u16_le index;
+        FileSys::SaveDataRank rank;
+        INSERT_PADDING_BYTES(0x25);
     };
     static_assert(sizeof(SaveDataInfo) == 0x60, "SaveDataInfo has incorrect size.");
 
@@ -733,7 +723,10 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
 FSP_SRV::~FSP_SRV() = default;
 
 void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_FS, "(STUBBED) called");
+    IPC::RequestParser rp{ctx};
+    current_process_id = rp.Pop<u64>();
+
+    LOG_DEBUG(Service_FS, "called. current_process_id=0x{:016X}", current_process_id);
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
@@ -776,16 +769,17 @@ void FSP_SRV::CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
 }
 
 void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
+    LOG_INFO(Service_FS, "called.");
 
-    auto space_id = rp.PopRaw<FileSys::SaveDataSpaceId>();
-    auto unk = rp.Pop<u32>();
-    LOG_INFO(Service_FS, "called with unknown={:08X}", unk);
-
-    auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>();
+    struct Parameters {
+        FileSys::SaveDataSpaceId save_data_space_id;
+        FileSys::SaveDataDescriptor descriptor;
+    };
 
-    auto dir = OpenSaveData(space_id, save_struct);
+    IPC::RequestParser rp{ctx};
+    const auto parameters = rp.PopRaw<Parameters>();
 
+    auto dir = OpenSaveData(parameters.save_data_space_id, parameters.descriptor);
     if (dir.Failed()) {
         IPC::ResponseBuilder rb{ctx, 2, 0, 0};
         rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND);
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index 3a5f4e200..d7572ba7a 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -32,6 +32,7 @@ private:
     void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
 
     FileSys::VirtualFile romfs;
+    u64 current_process_id = 0;
 };
 
 } // namespace Service::FileSystem
diff --git a/src/core/hle/service/hid/controllers/debug_pad.h b/src/core/hle/service/hid/controllers/debug_pad.h
index 929035034..e584b92ec 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.h
+++ b/src/core/hle/service/hid/controllers/debug_pad.h
@@ -41,20 +41,20 @@ private:
     struct PadState {
         union {
             u32_le raw{};
-            BitField<0, 1, u32_le> a;
-            BitField<1, 1, u32_le> b;
-            BitField<2, 1, u32_le> x;
-            BitField<3, 1, u32_le> y;
-            BitField<4, 1, u32_le> l;
-            BitField<5, 1, u32_le> r;
-            BitField<6, 1, u32_le> zl;
-            BitField<7, 1, u32_le> zr;
-            BitField<8, 1, u32_le> plus;
-            BitField<9, 1, u32_le> minus;
-            BitField<10, 1, u32_le> d_left;
-            BitField<11, 1, u32_le> d_up;
-            BitField<12, 1, u32_le> d_right;
-            BitField<13, 1, u32_le> d_down;
+            BitField<0, 1, u32> a;
+            BitField<1, 1, u32> b;
+            BitField<2, 1, u32> x;
+            BitField<3, 1, u32> y;
+            BitField<4, 1, u32> l;
+            BitField<5, 1, u32> r;
+            BitField<6, 1, u32> zl;
+            BitField<7, 1, u32> zr;
+            BitField<8, 1, u32> plus;
+            BitField<9, 1, u32> minus;
+            BitField<10, 1, u32> d_left;
+            BitField<11, 1, u32> d_up;
+            BitField<12, 1, u32> d_right;
+            BitField<13, 1, u32> d_down;
         };
     };
     static_assert(sizeof(PadState) == 0x4, "PadState is an invalid size");
@@ -62,7 +62,7 @@ private:
     struct Attributes {
         union {
             u32_le raw{};
-            BitField<0, 1, u32_le> connected;
+            BitField<0, 1, u32> connected;
         };
     };
     static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 18c7a94e6..4ff50b3cd 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -39,13 +39,13 @@ public:
         union {
             u32_le raw{};
 
-            BitField<0, 1, u32_le> pro_controller;
-            BitField<1, 1, u32_le> handheld;
-            BitField<2, 1, u32_le> joycon_dual;
-            BitField<3, 1, u32_le> joycon_left;
-            BitField<4, 1, u32_le> joycon_right;
+            BitField<0, 1, u32> pro_controller;
+            BitField<1, 1, u32> handheld;
+            BitField<2, 1, u32> joycon_dual;
+            BitField<3, 1, u32> joycon_left;
+            BitField<4, 1, u32> joycon_right;
 
-            BitField<6, 1, u32_le> pokeball; // TODO(ogniK): Confirm when possible
+            BitField<6, 1, u32> pokeball; // TODO(ogniK): Confirm when possible
         };
     };
     static_assert(sizeof(NPadType) == 4, "NPadType is an invalid size");
@@ -150,43 +150,43 @@ private:
         union {
             u64_le raw{};
             // Button states
-            BitField<0, 1, u64_le> a;
-            BitField<1, 1, u64_le> b;
-            BitField<2, 1, u64_le> x;
-            BitField<3, 1, u64_le> y;
-            BitField<4, 1, u64_le> l_stick;
-            BitField<5, 1, u64_le> r_stick;
-            BitField<6, 1, u64_le> l;
-            BitField<7, 1, u64_le> r;
-            BitField<8, 1, u64_le> zl;
-            BitField<9, 1, u64_le> zr;
-            BitField<10, 1, u64_le> plus;
-            BitField<11, 1, u64_le> minus;
+            BitField<0, 1, u64> a;
+            BitField<1, 1, u64> b;
+            BitField<2, 1, u64> x;
+            BitField<3, 1, u64> y;
+            BitField<4, 1, u64> l_stick;
+            BitField<5, 1, u64> r_stick;
+            BitField<6, 1, u64> l;
+            BitField<7, 1, u64> r;
+            BitField<8, 1, u64> zl;
+            BitField<9, 1, u64> zr;
+            BitField<10, 1, u64> plus;
+            BitField<11, 1, u64> minus;
 
             // D-Pad
-            BitField<12, 1, u64_le> d_left;
-            BitField<13, 1, u64_le> d_up;
-            BitField<14, 1, u64_le> d_right;
-            BitField<15, 1, u64_le> d_down;
+            BitField<12, 1, u64> d_left;
+            BitField<13, 1, u64> d_up;
+            BitField<14, 1, u64> d_right;
+            BitField<15, 1, u64> d_down;
 
             // Left JoyStick
-            BitField<16, 1, u64_le> l_stick_left;
-            BitField<17, 1, u64_le> l_stick_up;
-            BitField<18, 1, u64_le> l_stick_right;
-            BitField<19, 1, u64_le> l_stick_down;
+            BitField<16, 1, u64> l_stick_left;
+            BitField<17, 1, u64> l_stick_up;
+            BitField<18, 1, u64> l_stick_right;
+            BitField<19, 1, u64> l_stick_down;
 
             // Right JoyStick
-            BitField<20, 1, u64_le> r_stick_left;
-            BitField<21, 1, u64_le> r_stick_up;
-            BitField<22, 1, u64_le> r_stick_right;
-            BitField<23, 1, u64_le> r_stick_down;
+            BitField<20, 1, u64> r_stick_left;
+            BitField<21, 1, u64> r_stick_up;
+            BitField<22, 1, u64> r_stick_right;
+            BitField<23, 1, u64> r_stick_down;
 
             // Not always active?
-            BitField<24, 1, u64_le> left_sl;
-            BitField<25, 1, u64_le> left_sr;
+            BitField<24, 1, u64> left_sl;
+            BitField<25, 1, u64> left_sr;
 
-            BitField<26, 1, u64_le> right_sl;
-            BitField<27, 1, u64_le> right_sr;
+            BitField<26, 1, u64> right_sl;
+            BitField<27, 1, u64> right_sr;
         };
     };
     static_assert(sizeof(ControllerPadState) == 8, "ControllerPadState is an invalid size");
@@ -200,12 +200,12 @@ private:
     struct ConnectionState {
         union {
             u32_le raw{};
-            BitField<0, 1, u32_le> IsConnected;
-            BitField<1, 1, u32_le> IsWired;
-            BitField<2, 1, u32_le> IsLeftJoyConnected;
-            BitField<3, 1, u32_le> IsLeftJoyWired;
-            BitField<4, 1, u32_le> IsRightJoyConnected;
-            BitField<5, 1, u32_le> IsRightJoyWired;
+            BitField<0, 1, u32> IsConnected;
+            BitField<1, 1, u32> IsWired;
+            BitField<2, 1, u32> IsLeftJoyConnected;
+            BitField<3, 1, u32> IsLeftJoyWired;
+            BitField<4, 1, u32> IsRightJoyConnected;
+            BitField<5, 1, u32> IsRightJoyWired;
         };
     };
     static_assert(sizeof(ConnectionState) == 4, "ConnectionState is an invalid size");
@@ -240,23 +240,23 @@ private:
     struct NPadProperties {
         union {
             s64_le raw{};
-            BitField<11, 1, s64_le> is_vertical;
-            BitField<12, 1, s64_le> is_horizontal;
-            BitField<13, 1, s64_le> use_plus;
-            BitField<14, 1, s64_le> use_minus;
+            BitField<11, 1, s64> is_vertical;
+            BitField<12, 1, s64> is_horizontal;
+            BitField<13, 1, s64> use_plus;
+            BitField<14, 1, s64> use_minus;
         };
     };
 
     struct NPadDevice {
         union {
             u32_le raw{};
-            BitField<0, 1, s32_le> pro_controller;
-            BitField<1, 1, s32_le> handheld;
-            BitField<2, 1, s32_le> handheld_left;
-            BitField<3, 1, s32_le> handheld_right;
-            BitField<4, 1, s32_le> joycon_left;
-            BitField<5, 1, s32_le> joycon_right;
-            BitField<6, 1, s32_le> pokeball;
+            BitField<0, 1, s32> pro_controller;
+            BitField<1, 1, s32> handheld;
+            BitField<2, 1, s32> handheld_left;
+            BitField<3, 1, s32> handheld_right;
+            BitField<4, 1, s32> joycon_left;
+            BitField<5, 1, s32> joycon_right;
+            BitField<6, 1, s32> pokeball;
         };
     };
 
diff --git a/src/core/hle/service/hid/controllers/touchscreen.h b/src/core/hle/service/hid/controllers/touchscreen.h
index 012b6e0dd..76fc340e9 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -33,8 +33,8 @@ private:
     struct Attributes {
         union {
             u32 raw{};
-            BitField<0, 1, u32_le> start_touch;
-            BitField<1, 1, u32_le> end_touch;
+            BitField<0, 1, u32> start_touch;
+            BitField<1, 1, u32> end_touch;
         };
     };
     static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 8a6de83a2..63b55758b 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -36,9 +36,9 @@ namespace Service::HID {
 
 // Updating period for each HID device.
 // TODO(ogniK): Find actual polling rate of hid
-constexpr u64 pad_update_ticks = Core::Timing::BASE_CLOCK_RATE / 66;
-constexpr u64 accelerometer_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
-constexpr u64 gyroscope_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
+constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66);
+constexpr s64 accelerometer_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
+constexpr s64 gyroscope_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
 constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
 
 IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
@@ -75,7 +75,7 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
     // Register update callbacks
     auto& core_timing = Core::System::GetInstance().CoreTiming();
     pad_update_event =
-        core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) {
             UpdateControllers(userdata, cycles_late);
         });
 
@@ -106,7 +106,7 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
     rb.PushCopyObjects(shared_mem);
 }
 
-void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) {
+void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
     auto& core_timing = Core::System::GetInstance().CoreTiming();
 
     const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 7cc58db4c..d3660cad2 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -4,6 +4,9 @@
 
 #pragma once
 
+#include "core/hle/service/hid/controllers/controller_base.h"
+#include "core/hle/service/service.h"
+
 #include "controllers/controller_base.h"
 #include "core/hle/service/service.h"
 
@@ -62,7 +65,7 @@ private:
     }
 
     void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx);
-    void UpdateControllers(u64 userdata, int cycles_late);
+    void UpdateControllers(u64 userdata, s64 cycles_late);
 
     Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;
 
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9df7ac50f..d65693fc7 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -319,15 +319,14 @@ public:
         }
 
         ASSERT(vm_manager
-                   .MirrorMemory(*map_address, nro_addr, nro_size,
-                                 Kernel::MemoryState::ModuleCodeStatic)
+                   .MirrorMemory(*map_address, nro_addr, nro_size, Kernel::MemoryState::ModuleCode)
                    .IsSuccess());
         ASSERT(vm_manager.UnmapRange(nro_addr, nro_size).IsSuccess());
 
         if (bss_size > 0) {
             ASSERT(vm_manager
                        .MirrorMemory(*map_address + nro_size, bss_addr, bss_size,
-                                     Kernel::MemoryState::ModuleCodeStatic)
+                                     Kernel::MemoryState::ModuleCode)
                        .IsSuccess());
             ASSERT(vm_manager.UnmapRange(bss_addr, bss_size).IsSuccess());
         }
@@ -388,8 +387,7 @@ public:
         const auto& nro_size = iter->second.size;
 
         ASSERT(vm_manager
-                   .MirrorMemory(heap_addr, mapped_addr, nro_size,
-                                 Kernel::MemoryState::ModuleCodeStatic)
+                   .MirrorMemory(heap_addr, mapped_addr, nro_size, Kernel::MemoryState::ModuleCode)
                    .IsSuccess());
         ASSERT(vm_manager.UnmapRange(mapped_addr, nro_size).IsSuccess());
 
diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp
index 1f462e087..2a61593e2 100644
--- a/src/core/hle/service/lm/lm.cpp
+++ b/src/core/hle/service/lm/lm.cpp
@@ -42,7 +42,7 @@ private:
         union {
             BitField<0, 16, Flags> flags;
             BitField<16, 8, Severity> severity;
-            BitField<24, 8, u32_le> verbosity;
+            BitField<24, 8, u32> verbosity;
         };
         u32_le payload_size;
 
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp
index 5c62d42ba..ca88bf97f 100644
--- a/src/core/hle/service/nfc/nfc.cpp
+++ b/src/core/hle/service/nfc/nfc.cpp
@@ -150,7 +150,7 @@ private:
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
-        rb.PushRaw<u8>(Settings::values.enable_nfc);
+        rb.PushRaw<u8>(true);
     }
 
     void GetStateOld(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 1c4482e47..c6babdd4d 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -335,7 +335,7 @@ void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) {
 }
 
 bool Module::Interface::LoadAmiibo(const std::vector<u8>& buffer) {
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     if (buffer.size() < sizeof(AmiiboFile)) {
         return false;
     }
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index 0f02a1a18..4f6042b00 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -19,11 +19,11 @@ public:
     virtual ~nvdevice() = default;
     union Ioctl {
         u32_le raw;
-        BitField<0, 8, u32_le> cmd;
-        BitField<8, 8, u32_le> group;
-        BitField<16, 14, u32_le> length;
-        BitField<30, 1, u32_le> is_in;
-        BitField<31, 1, u32_le> is_out;
+        BitField<0, 8, u32> cmd;
+        BitField<8, 8, u32> group;
+        BitField<16, 14, u32> length;
+        BitField<30, 1, u32> is_in;
+        BitField<31, 1, u32> is_out;
     };
 
     /**
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index ace71169f..12f3ef825 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -18,7 +18,7 @@ class nvmap;
 class nvdisp_disp0 final : public nvdevice {
 public:
     explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev);
-    ~nvdisp_disp0();
+    ~nvdisp_disp0() override;
 
     u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index a34b9e753..af62d33d2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,6 +10,7 @@
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
+#include "core/memory.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
@@ -88,7 +89,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
     for (const auto& entry : entries) {
         LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
                     entry.offset, entry.nvmap_handle, entry.pages);
-        Tegra::GPUVAddr offset = static_cast<Tegra::GPUVAddr>(entry.offset) << 0x10;
+        GPUVAddr offset = static_cast<GPUVAddr>(entry.offset) << 0x10;
         auto object = nvmap_dev->GetObject(entry.nvmap_handle);
         if (!object) {
             LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle);
@@ -101,7 +102,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
         u64 size = static_cast<u64>(entry.pages) << 0x10;
         ASSERT(size <= object->size);
 
-        Tegra::GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
+        GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
         ASSERT(returned == offset);
     }
     std::memcpy(output.data(), entries.data(), output.size());
@@ -172,16 +173,8 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
         return 0;
     }
 
-    auto& system_instance = Core::System::GetInstance();
-
-    // Remove this memory region from the rasterizer cache.
-    auto& gpu = system_instance.GPU();
-    auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
-    ASSERT(cpu_addr);
-    gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
-
-    params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
-
+    params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset,
+                                                                                  itr->second.size);
     buffer_mappings.erase(itr->second.offset);
 
     std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h
index fe311b069..5b4889910 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -17,7 +17,7 @@ namespace Service::Nvidia {
 class NVDRV final : public ServiceFramework<NVDRV> {
 public:
     NVDRV(std::shared_ptr<Module> nvdrv, const char* name);
-    ~NVDRV();
+    ~NVDRV() override;
 
 private:
     void Open(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/nvdrv/nvmemp.h b/src/core/hle/service/nvdrv/nvmemp.h
index 5a4dfc1f9..6eafb1346 100644
--- a/src/core/hle/service/nvdrv/nvmemp.h
+++ b/src/core/hle/service/nvdrv/nvmemp.h
@@ -11,7 +11,7 @@ namespace Service::Nvidia {
 class NVMEMP final : public ServiceFramework<NVMEMP> {
 public:
     NVMEMP();
-    ~NVMEMP();
+    ~NVMEMP() override;
 
 private:
     void Cmd0(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index fc496b654..c7f5bbf28 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -26,7 +26,7 @@
 namespace Service::NVFlinger {
 
 constexpr std::size_t SCREEN_REFRESH_RATE = 60;
-constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
+constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
 
 NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
     displays.emplace_back(0, "Default");
@@ -37,7 +37,7 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t
 
     // Schedule the screen composition events
     composition_event =
-        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
             Compose();
             this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
         });
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 830790269..abbfe5524 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -90,7 +90,7 @@ private:
                            Kernel::HLERequestContext& ctx);
 
     ServiceFrameworkBase(const char* service_name, u32 max_sessions, InvokerFn* handler_invoker);
-    ~ServiceFrameworkBase();
+    ~ServiceFrameworkBase() override;
 
     void RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n);
     void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info);
diff --git a/src/core/hle/service/set/set_cal.h b/src/core/hle/service/set/set_cal.h
index 583036eac..a0677e815 100644
--- a/src/core/hle/service/set/set_cal.h
+++ b/src/core/hle/service/set/set_cal.h
@@ -11,7 +11,7 @@ namespace Service::Set {
 class SET_CAL final : public ServiceFramework<SET_CAL> {
 public:
     explicit SET_CAL();
-    ~SET_CAL();
+    ~SET_CAL() override;
 };
 
 } // namespace Service::Set
diff --git a/src/core/hle/service/set/set_sys.cpp b/src/core/hle/service/set/set_sys.cpp
index c9b4da5b0..ecee554bf 100644
--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -2,13 +2,88 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/file_sys/errors.h"
+#include "core/file_sys/system_archive/system_version.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
+#include "core/hle/service/filesystem/filesystem.h"
 #include "core/hle/service/set/set_sys.h"
 
 namespace Service::Set {
 
+namespace {
+constexpr u64 SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET = 0x05;
+
+enum class GetFirmwareVersionType {
+    Version1,
+    Version2,
+};
+
+void GetFirmwareVersionImpl(Kernel::HLERequestContext& ctx, GetFirmwareVersionType type) {
+    LOG_WARNING(Service_SET, "called - Using hardcoded firmware version '{}'",
+                FileSys::SystemArchive::GetLongDisplayVersion());
+
+    ASSERT_MSG(ctx.GetWriteBufferSize() == 0x100,
+               "FirmwareVersion output buffer must be 0x100 bytes in size!");
+
+    // Instead of using the normal procedure of checking for the real system archive and if it
+    // doesn't exist, synthesizing one, I feel that that would lead to strange bugs because a
+    // used is using a really old or really new SystemVersion title. The synthesized one ensures
+    // consistence (currently reports as 5.1.0-0.0)
+    const auto archive = FileSys::SystemArchive::SystemVersion();
+
+    const auto early_exit_failure = [&ctx](const std::string& desc, ResultCode code) {
+        LOG_ERROR(Service_SET, "General failure while attempting to resolve firmware version ({}).",
+                  desc.c_str());
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(code);
+    };
+
+    if (archive == nullptr) {
+        early_exit_failure("The system version archive couldn't be synthesized.",
+                           FileSys::ERROR_FAILED_MOUNT_ARCHIVE);
+        return;
+    }
+
+    const auto ver_file = archive->GetFile("file");
+    if (ver_file == nullptr) {
+        early_exit_failure("The system version archive didn't contain the file 'file'.",
+                           FileSys::ERROR_INVALID_ARGUMENT);
+        return;
+    }
+
+    auto data = ver_file->ReadAllBytes();
+    if (data.size() != 0x100) {
+        early_exit_failure("The system version file 'file' was not the correct size.",
+                           FileSys::ERROR_OUT_OF_BOUNDS);
+        return;
+    }
+
+    // If the command is GetFirmwareVersion (as opposed to GetFirmwareVersion2), hardware will
+    // zero out the REVISION_MINOR field.
+    if (type == GetFirmwareVersionType::Version1) {
+        data[SYSTEM_VERSION_FILE_MINOR_REVISION_OFFSET] = 0;
+    }
+
+    ctx.WriteBuffer(data);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+} // Anonymous namespace
+
+void SET_SYS::GetFirmwareVersion(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_SET, "called");
+    GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version1);
+}
+
+void SET_SYS::GetFirmwareVersion2(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_SET, "called");
+    GetFirmwareVersionImpl(ctx, GetFirmwareVersionType::Version2);
+}
+
 void SET_SYS::GetColorSetId(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_SET, "called");
 
@@ -33,8 +108,8 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
         {0, nullptr, "SetLanguageCode"},
         {1, nullptr, "SetNetworkSettings"},
         {2, nullptr, "GetNetworkSettings"},
-        {3, nullptr, "GetFirmwareVersion"},
-        {4, nullptr, "GetFirmwareVersion2"},
+        {3, &SET_SYS::GetFirmwareVersion, "GetFirmwareVersion"},
+        {4, &SET_SYS::GetFirmwareVersion2, "GetFirmwareVersion2"},
         {5, nullptr, "GetFirmwareVersionDigest"},
         {7, nullptr, "GetLockScreenFlag"},
         {8, nullptr, "SetLockScreenFlag"},
diff --git a/src/core/hle/service/set/set_sys.h b/src/core/hle/service/set/set_sys.h
index f602f3c77..13ee2cf46 100644
--- a/src/core/hle/service/set/set_sys.h
+++ b/src/core/hle/service/set/set_sys.h
@@ -20,6 +20,8 @@ private:
         BasicBlack = 1,
     };
 
+    void GetFirmwareVersion(Kernel::HLERequestContext& ctx);
+    void GetFirmwareVersion2(Kernel::HLERequestContext& ctx);
     void GetColorSetId(Kernel::HLERequestContext& ctx);
     void SetColorSetId(Kernel::HLERequestContext& ctx);
 
diff --git a/src/core/hle/service/sockets/sfdnsres.cpp b/src/core/hle/service/sockets/sfdnsres.cpp
index 13ab1d31e..852e71e4b 100644
--- a/src/core/hle/service/sockets/sfdnsres.cpp
+++ b/src/core/hle/service/sockets/sfdnsres.cpp
@@ -8,12 +8,20 @@
 namespace Service::Sockets {
 
 void SFDNSRES::GetAddrInfo(Kernel::HLERequestContext& ctx) {
+    struct Parameters {
+        u8 use_nsd_resolve;
+        u32 unknown;
+        u64 process_id;
+    };
+
     IPC::RequestParser rp{ctx};
+    const auto parameters = rp.PopRaw<Parameters>();
 
-    LOG_WARNING(Service, "(STUBBED) called");
+    LOG_WARNING(Service,
+                "(STUBBED) called. use_nsd_resolve={}, unknown=0x{:08X}, process_id=0x{:016X}",
+                parameters.use_nsd_resolve, parameters.unknown, parameters.process_id);
 
     IPC::ResponseBuilder rb{ctx, 2};
-
     rb.Push(RESULT_SUCCESS);
 }
 
diff --git a/src/core/hle/service/spl/module.cpp b/src/core/hle/service/spl/module.cpp
index 8db0c2f13..e724d4ab8 100644
--- a/src/core/hle/service/spl/module.cpp
+++ b/src/core/hle/service/spl/module.cpp
@@ -26,9 +26,7 @@ Module::Interface::~Interface() = default;
 void Module::Interface::GetRandomBytes(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_SPL, "called");
 
-    IPC::RequestParser rp{ctx};
-
-    std::size_t size = ctx.GetWriteBufferSize();
+    const std::size_t size = ctx.GetWriteBufferSize();
 
     std::uniform_int_distribution<u16> distribution(0, std::numeric_limits<u8>::max());
     std::vector<u8> data(size);
diff --git a/src/core/hle/service/ssl/ssl.cpp b/src/core/hle/service/ssl/ssl.cpp
index af40a1815..f7f87a958 100644
--- a/src/core/hle/service/ssl/ssl.cpp
+++ b/src/core/hle/service/ssl/ssl.cpp
@@ -64,13 +64,19 @@ public:
         };
         RegisterHandlers(functions);
     }
-    ~ISslContext() = default;
 
 private:
     void SetOption(Kernel::HLERequestContext& ctx) {
-        LOG_WARNING(Service_SSL, "(STUBBED) called");
+        struct Parameters {
+            u8 enable;
+            u32 option;
+        };
 
         IPC::RequestParser rp{ctx};
+        const auto parameters = rp.PopRaw<Parameters>();
+
+        LOG_WARNING(Service_SSL, "(STUBBED) called. enable={}, option={}", parameters.enable,
+                    parameters.option);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 566cd6006..4e17249a9 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -498,7 +498,6 @@ public:
         };
         RegisterHandlers(functions);
     }
-    ~IHOSBinderDriver() = default;
 
 private:
     enum class TransactionId {
@@ -692,7 +691,6 @@ public:
         };
         RegisterHandlers(functions);
     }
-    ~ISystemDisplayService() = default;
 
 private:
     void SetLayerZ(Kernel::HLERequestContext& ctx) {
@@ -818,7 +816,6 @@ public:
         };
         RegisterHandlers(functions);
     }
-    ~IManagerDisplayService() = default;
 
 private:
     void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -884,7 +881,6 @@ private:
 class IApplicationDisplayService final : public ServiceFramework<IApplicationDisplayService> {
 public:
     explicit IApplicationDisplayService(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
-    ~IApplicationDisplayService() = default;
 
 private:
     enum class ConvertedScaleMode : u64 {
@@ -1037,7 +1033,6 @@ private:
     void ListDisplays(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_VI, "(STUBBED) called");
 
-        IPC::RequestParser rp{ctx};
         DisplayInfo display_info;
         display_info.width *= static_cast<u64>(Settings::values.resolution_factor);
         display_info.height *= static_cast<u64>(Settings::values.resolution_factor);
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 6057c7f26..46ac372f6 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "common/file_util.h"
 #include "common/logging/log.h"
+#include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/loader/elf.h"
@@ -340,7 +341,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
     }
 
     codeset.entrypoint = base_addr + header->e_entry;
-    codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image));
+    codeset.memory = std::move(program_image);
 
     LOG_DEBUG(Loader, "Done loading.");
 
diff --git a/src/core/loader/linker.cpp b/src/core/loader/linker.cpp
deleted file mode 100644
index 57ca8c3ee..000000000
--- a/src/core/loader/linker.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <vector>
-
-#include "common/common_funcs.h"
-#include "common/logging/log.h"
-#include "common/swap.h"
-#include "core/loader/linker.h"
-#include "core/memory.h"
-
-namespace Loader {
-
-enum class RelocationType : u32 { ABS64 = 257, GLOB_DAT = 1025, JUMP_SLOT = 1026, RELATIVE = 1027 };
-
-enum DynamicType : u32 {
-    DT_NULL = 0,
-    DT_PLTRELSZ = 2,
-    DT_STRTAB = 5,
-    DT_SYMTAB = 6,
-    DT_RELA = 7,
-    DT_RELASZ = 8,
-    DT_STRSZ = 10,
-    DT_JMPREL = 23,
-};
-
-struct Elf64_Rela {
-    u64_le offset;
-    RelocationType type;
-    u32_le symbol;
-    s64_le addend;
-};
-static_assert(sizeof(Elf64_Rela) == 0x18, "Elf64_Rela has incorrect size.");
-
-struct Elf64_Dyn {
-    u64_le tag;
-    u64_le value;
-};
-static_assert(sizeof(Elf64_Dyn) == 0x10, "Elf64_Dyn has incorrect size.");
-
-struct Elf64_Sym {
-    u32_le name;
-    INSERT_PADDING_BYTES(0x2);
-    u16_le shndx;
-    u64_le value;
-    u64_le size;
-};
-static_assert(sizeof(Elf64_Sym) == 0x18, "Elf64_Sym has incorrect size.");
-
-void Linker::WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
-                              u64 relocation_offset, u64 size, VAddr load_base) {
-    for (u64 i = 0; i < size; i += sizeof(Elf64_Rela)) {
-        Elf64_Rela rela;
-        std::memcpy(&rela, &program_image[relocation_offset + i], sizeof(Elf64_Rela));
-
-        const Symbol& symbol = symbols[rela.symbol];
-        switch (rela.type) {
-        case RelocationType::RELATIVE: {
-            const u64 value = load_base + rela.addend;
-            if (!symbol.name.empty()) {
-                exports[symbol.name] = value;
-            }
-            std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
-            break;
-        }
-        case RelocationType::JUMP_SLOT:
-        case RelocationType::GLOB_DAT:
-            if (!symbol.value) {
-                imports[symbol.name] = {rela.offset + load_base, 0};
-            } else {
-                exports[symbol.name] = symbol.value;
-                std::memcpy(&program_image[rela.offset], &symbol.value, sizeof(u64));
-            }
-            break;
-        case RelocationType::ABS64:
-            if (!symbol.value) {
-                imports[symbol.name] = {rela.offset + load_base, rela.addend};
-            } else {
-                const u64 value = symbol.value + rela.addend;
-                exports[symbol.name] = value;
-                std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
-            }
-            break;
-        default:
-            LOG_CRITICAL(Loader, "Unknown relocation type: {}", static_cast<int>(rela.type));
-            break;
-        }
-    }
-}
-
-void Linker::Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base) {
-    std::map<u64, u64> dynamic;
-    while (dynamic_section_offset < program_image.size()) {
-        Elf64_Dyn dyn;
-        std::memcpy(&dyn, &program_image[dynamic_section_offset], sizeof(Elf64_Dyn));
-        dynamic_section_offset += sizeof(Elf64_Dyn);
-
-        if (dyn.tag == DT_NULL) {
-            break;
-        }
-        dynamic[dyn.tag] = dyn.value;
-    }
-
-    u64 offset = dynamic[DT_SYMTAB];
-    std::vector<Symbol> symbols;
-    while (offset < program_image.size()) {
-        Elf64_Sym sym;
-        std::memcpy(&sym, &program_image[offset], sizeof(Elf64_Sym));
-        offset += sizeof(Elf64_Sym);
-
-        if (sym.name >= dynamic[DT_STRSZ]) {
-            break;
-        }
-
-        std::string name = reinterpret_cast<char*>(&program_image[dynamic[DT_STRTAB] + sym.name]);
-        if (sym.value) {
-            exports[name] = load_base + sym.value;
-            symbols.emplace_back(std::move(name), load_base + sym.value);
-        } else {
-            symbols.emplace_back(std::move(name), 0);
-        }
-    }
-
-    if (dynamic.find(DT_RELA) != dynamic.end()) {
-        WriteRelocations(program_image, symbols, dynamic[DT_RELA], dynamic[DT_RELASZ], load_base);
-    }
-
-    if (dynamic.find(DT_JMPREL) != dynamic.end()) {
-        WriteRelocations(program_image, symbols, dynamic[DT_JMPREL], dynamic[DT_PLTRELSZ],
-                         load_base);
-    }
-}
-
-void Linker::ResolveImports() {
-    // Resolve imports
-    for (const auto& import : imports) {
-        const auto& search = exports.find(import.first);
-        if (search != exports.end()) {
-            Memory::Write64(import.second.ea, search->second + import.second.addend);
-        } else {
-            LOG_ERROR(Loader, "Unresolved import: {}", import.first);
-        }
-    }
-}
-
-} // namespace Loader
diff --git a/src/core/loader/linker.h b/src/core/loader/linker.h
deleted file mode 100644
index 107625837..000000000
--- a/src/core/loader/linker.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <map>
-#include <string>
-#include "common/common_types.h"
-
-namespace Loader {
-
-class Linker {
-protected:
-    struct Symbol {
-        Symbol(std::string&& name, u64 value) : name(std::move(name)), value(value) {}
-        std::string name;
-        u64 value;
-    };
-
-    struct Import {
-        VAddr ea;
-        s64 addend;
-    };
-
-    void WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
-                          u64 relocation_offset, u64 size, VAddr load_base);
-    void Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base);
-
-    void ResolveImports();
-
-    std::map<std::string, Import> imports;
-    std::map<std::string, VAddr> exports;
-};
-
-} // namespace Loader
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 4fad0c0dd..31e4a0c84 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -14,6 +14,7 @@
 #include "core/file_sys/romfs_factory.h"
 #include "core/file_sys/vfs_offset.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/hle/service/filesystem/filesystem.h"
@@ -186,7 +187,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
     program_image.resize(static_cast<u32>(program_image.size()) + bss_size);
 
     // Load codeset for current process
-    codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image));
+    codeset.memory = std::move(program_image);
     process.LoadModule(std::move(codeset), load_base);
 
     // Register module with GDBStub
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 013d629c0..85b0ed644 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -4,10 +4,10 @@
 
 #pragma once
 
+#include <memory>
 #include <string>
 #include <vector>
 #include "common/common_types.h"
-#include "core/loader/linker.h"
 #include "core/loader/loader.h"
 
 namespace FileSys {
@@ -21,7 +21,7 @@ class Process;
 namespace Loader {
 
 /// Loads an NRO file
-class AppLoader_NRO final : public AppLoader, Linker {
+class AppLoader_NRO final : public AppLoader {
 public:
     explicit AppLoader_NRO(FileSys::VirtualFile file);
     ~AppLoader_NRO() override;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 6ded0b707..ffe2eea8a 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -4,13 +4,17 @@
 
 #include <cinttypes>
 #include <vector>
-#include <lz4.h>
+
 #include "common/common_funcs.h"
 #include "common/file_util.h"
+#include "common/hex_util.h"
 #include "common/logging/log.h"
+#include "common/lz4_compression.h"
 #include "common/swap.h"
+#include "core/core.h"
 #include "core/file_sys/patch_manager.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/code_set.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/loader/nso.h"
@@ -18,36 +22,8 @@
 #include "core/settings.h"
 
 namespace Loader {
-
-struct NsoSegmentHeader {
-    u32_le offset;
-    u32_le location;
-    u32_le size;
-    union {
-        u32_le alignment;
-        u32_le bss_size;
-    };
-};
-static_assert(sizeof(NsoSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
-
-struct NsoHeader {
-    u32_le magic;
-    u32_le version;
-    INSERT_PADDING_WORDS(1);
-    u8 flags;
-    std::array<NsoSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
-    std::array<u8, 0x20> build_id;
-    std::array<u32_le, 3> segments_compressed_size;
-
-    bool IsSegmentCompressed(size_t segment_num) const {
-        ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
-        return ((flags >> segment_num) & 1);
-    }
-};
-static_assert(sizeof(NsoHeader) == 0x6c, "NsoHeader has incorrect size.");
-static_assert(std::is_trivially_copyable_v<NsoHeader>, "NsoHeader isn't trivially copyable.");
-
-struct ModHeader {
+namespace {
+struct MODHeader {
     u32_le magic;
     u32_le dynamic_offset;
     u32_le bss_start_offset;
@@ -56,7 +32,28 @@ struct ModHeader {
     u32_le eh_frame_hdr_end_offset;
     u32_le module_offset; // Offset to runtime-generated module object. typically equal to .bss base
 };
-static_assert(sizeof(ModHeader) == 0x1c, "ModHeader has incorrect size.");
+static_assert(sizeof(MODHeader) == 0x1c, "MODHeader has incorrect size.");
+
+std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
+                                  const NSOSegmentHeader& header) {
+    const std::vector<u8> uncompressed_data =
+        Common::Compression::DecompressDataLZ4(compressed_data, header.size);
+
+    ASSERT_MSG(uncompressed_data.size() == static_cast<int>(header.size), "{} != {}", header.size,
+               uncompressed_data.size());
+
+    return uncompressed_data;
+}
+
+constexpr u32 PageAlignSize(u32 size) {
+    return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
+}
+} // Anonymous namespace
+
+bool NSOHeader::IsSegmentCompressed(size_t segment_num) const {
+    ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
+    return ((flags >> segment_num) & 1) != 0;
+}
 
 AppLoader_NSO::AppLoader_NSO(FileSys::VirtualFile file) : AppLoader(std::move(file)) {}
 
@@ -73,38 +70,22 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& file) {
     return FileType::NSO;
 }
 
-static std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
-                                         const NsoSegmentHeader& header) {
-    std::vector<u8> uncompressed_data(header.size);
-    const int bytes_uncompressed =
-        LZ4_decompress_safe(reinterpret_cast<const char*>(compressed_data.data()),
-                            reinterpret_cast<char*>(uncompressed_data.data()),
-                            static_cast<int>(compressed_data.size()), header.size);
-
-    ASSERT_MSG(bytes_uncompressed == static_cast<int>(header.size) &&
-                   bytes_uncompressed == static_cast<int>(uncompressed_data.size()),
-               "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
-
-    return uncompressed_data;
-}
-
-static constexpr u32 PageAlignSize(u32 size) {
-    return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
-}
-
 std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
                                                const FileSys::VfsFile& file, VAddr load_base,
                                                bool should_pass_arguments,
                                                std::optional<FileSys::PatchManager> pm) {
-    if (file.GetSize() < sizeof(NsoHeader))
+    if (file.GetSize() < sizeof(NSOHeader)) {
         return {};
+    }
 
-    NsoHeader nso_header{};
-    if (sizeof(NsoHeader) != file.ReadObject(&nso_header))
+    NSOHeader nso_header{};
+    if (sizeof(NSOHeader) != file.ReadObject(&nso_header)) {
         return {};
+    }
 
-    if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
+    if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0')) {
         return {};
+    }
 
     // Build program image
     Kernel::CodeSet codeset;
@@ -140,10 +121,10 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
     std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32));
 
     // Read MOD header
-    ModHeader mod_header{};
+    MODHeader mod_header{};
     // Default .bss to size in segment header if MOD0 section doesn't exist
     u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)};
-    std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(ModHeader));
+    std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(MODHeader));
     const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')};
     if (has_mod_header) {
         // Resize program image to include .bss section and page align each section
@@ -155,17 +136,29 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
 
     // Apply patches if necessary
     if (pm && (pm->HasNSOPatch(nso_header.build_id) || Settings::values.dump_nso)) {
-        std::vector<u8> pi_header(program_image.size() + 0x100);
-        std::memcpy(pi_header.data(), &nso_header, sizeof(NsoHeader));
-        std::memcpy(pi_header.data() + 0x100, program_image.data(), program_image.size());
+        std::vector<u8> pi_header(sizeof(NSOHeader) + program_image.size());
+        pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
+                         reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
+        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
+                         program_image.end());
 
         pi_header = pm->PatchNSO(pi_header);
 
-        std::memcpy(program_image.data(), pi_header.data() + 0x100, program_image.size());
+        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
+    }
+
+    // Apply cheats if they exist and the program has a valid title ID
+    if (pm) {
+        auto& system = Core::System::GetInstance();
+        const auto cheats = pm->CreateCheatList(system, nso_header.build_id);
+        if (!cheats.empty()) {
+            system.RegisterCheatList(cheats, Common::HexArrayToString(nso_header.build_id),
+                                     load_base, load_base + program_image.size());
+        }
     }
 
     // Load codeset for current process
-    codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image));
+    codeset.memory = std::move(program_image);
     process.LoadModule(std::move(codeset), load_base);
 
     // Register module with GDBStub
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 135b6ea5a..4674c3724 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -4,10 +4,12 @@
 
 #pragma once
 
+#include <array>
 #include <optional>
+#include <type_traits>
 #include "common/common_types.h"
+#include "common/swap.h"
 #include "core/file_sys/patch_manager.h"
-#include "core/loader/linker.h"
 #include "core/loader/loader.h"
 
 namespace Kernel {
@@ -16,6 +18,43 @@ class Process;
 
 namespace Loader {
 
+struct NSOSegmentHeader {
+    u32_le offset;
+    u32_le location;
+    u32_le size;
+    union {
+        u32_le alignment;
+        u32_le bss_size;
+    };
+};
+static_assert(sizeof(NSOSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect size.");
+
+struct NSOHeader {
+    using SHA256Hash = std::array<u8, 0x20>;
+
+    struct RODataRelativeExtent {
+        u32_le data_offset;
+        u32_le size;
+    };
+
+    u32_le magic;
+    u32_le version;
+    u32 reserved;
+    u32_le flags;
+    std::array<NSOSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
+    std::array<u8, 0x20> build_id;
+    std::array<u32_le, 3> segments_compressed_size;
+    std::array<u8, 0x1C> padding;
+    RODataRelativeExtent api_info_extent;
+    RODataRelativeExtent dynstr_extent;
+    RODataRelativeExtent dynsyn_extent;
+    std::array<SHA256Hash, 3> segment_hashes;
+
+    bool IsSegmentCompressed(size_t segment_num) const;
+};
+static_assert(sizeof(NSOHeader) == 0x100, "NSOHeader has incorrect size.");
+static_assert(std::is_trivially_copyable_v<NSOHeader>, "NSOHeader must be trivially copyable.");
+
 constexpr u64 NSO_ARGUMENT_DATA_ALLOCATION_SIZE = 0x9000;
 
 struct NSOArgumentHeader {
@@ -26,7 +65,7 @@ struct NSOArgumentHeader {
 static_assert(sizeof(NSOArgumentHeader) == 0x20, "NSOArgumentHeader has incorrect size.");
 
 /// Loads an NSO file
-class AppLoader_NSO final : public AppLoader, Linker {
+class AppLoader_NSO final : public AppLoader {
 public:
     explicit AppLoader_NSO(FileSys::VirtualFile file);
 
diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h
index d6995b61e..436f7387c 100644
--- a/src/core/loader/xci.h
+++ b/src/core/loader/xci.h
@@ -22,7 +22,7 @@ class AppLoader_NCA;
 class AppLoader_XCI final : public AppLoader {
 public:
     explicit AppLoader_XCI(FileSys::VirtualFile file);
-    ~AppLoader_XCI();
+    ~AppLoader_XCI() override;
 
     /**
      * Returns the type of the file
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 6591c45d2..4e0538bc2 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -10,6 +10,7 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "common/page_table.h"
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
@@ -18,13 +19,14 @@
 #include "core/hle/lock.h"
 #include "core/memory.h"
 #include "core/memory_setup.h"
+#include "video_core/gpu.h"
 #include "video_core/renderer_base.h"
 
 namespace Memory {
 
-static PageTable* current_page_table = nullptr;
+static Common::PageTable* current_page_table = nullptr;
 
-void SetCurrentPageTable(PageTable* page_table) {
+void SetCurrentPageTable(Common::PageTable* page_table) {
     current_page_table = page_table;
 
     auto& system = Core::System::GetInstance();
@@ -36,39 +38,16 @@ void SetCurrentPageTable(PageTable* page_table) {
     }
 }
 
-PageTable* GetCurrentPageTable() {
-    return current_page_table;
-}
-
-PageTable::PageTable() = default;
-
-PageTable::PageTable(std::size_t address_space_width_in_bits) {
-    Resize(address_space_width_in_bits);
-}
-
-PageTable::~PageTable() = default;
-
-void PageTable::Resize(std::size_t address_space_width_in_bits) {
-    const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
-
-    pointers.resize(num_page_table_entries);
-    attributes.resize(num_page_table_entries);
-
-    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
-    // vector size is subsequently decreased (via resize), the vector might not automatically
-    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
-    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
-
-    pointers.shrink_to_fit();
-    attributes.shrink_to_fit();
-}
-
-static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
+static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
+                     Common::PageType type) {
     LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
               (base + size) * PAGE_SIZE);
 
-    RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
-                                 FlushMode::FlushAndInvalidate);
+    // During boot, current_page_table might not be set yet, in which case we need not flush
+    if (Core::System::GetInstance().IsPoweredOn()) {
+        Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
+                                                                   size * PAGE_SIZE);
+    }
 
     VAddr end = base + size;
     ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
@@ -88,41 +67,47 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
     }
 }
 
-void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target) {
+void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
     ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
     ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
 }
 
-void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler) {
+void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                 Common::MemoryHookPointer mmio_handler) {
     ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
     ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Special);
 
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::IODevice, std::move(mmio_handler)};
-    page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, std::move(mmio_handler)};
+    page_table.special_regions.add(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }
 
-void UnmapRegion(PageTable& page_table, VAddr base, u64 size) {
+void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
     ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
     ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Unmapped);
 
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
     page_table.special_regions.erase(interval);
 }
 
-void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
+void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                  Common::MemoryHookPointer hook) {
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
-    page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
+    page_table.special_regions.add(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }
 
-void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
+void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer hook) {
     auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
-    page_table.special_regions.subtract(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
+    page_table.special_regions.subtract(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }
 
 /**
@@ -171,19 +156,19 @@ T Read(const VAddr vaddr) {
         return value;
     }
 
-    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+    Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
     switch (type) {
-    case PageType::Unmapped:
+    case Common::PageType::Unmapped:
         LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
         return 0;
-    case PageType::Memory:
+    case Common::PageType::Memory:
         ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
         break;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush);
-
+    case Common::PageType::RasterizerCachedMemory: {
+        auto host_ptr{GetPointerFromVMA(vaddr)};
+        Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
         T value;
-        std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
+        std::memcpy(&value, host_ptr, sizeof(T));
         return value;
     }
     default:
@@ -201,18 +186,19 @@ void Write(const VAddr vaddr, const T data) {
         return;
     }
 
-    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+    Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
     switch (type) {
-    case PageType::Unmapped:
+    case Common::PageType::Unmapped:
         LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
                   static_cast<u32>(data), vaddr);
         return;
-    case PageType::Memory:
+    case Common::PageType::Memory:
         ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
         break;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
-        std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
+    case Common::PageType::RasterizerCachedMemory: {
+        auto host_ptr{GetPointerFromVMA(vaddr)};
+        Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
+        std::memcpy(host_ptr, &data, sizeof(T));
         break;
     }
     default:
@@ -227,10 +213,10 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
     if (page_pointer)
         return true;
 
-    if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory)
+    if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory)
         return true;
 
-    if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special)
+    if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special)
         return false;
 
     return false;
@@ -250,7 +236,8 @@ u8* GetPointer(const VAddr vaddr) {
         return page_pointer + (vaddr & PAGE_MASK);
     }
 
-    if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
+    if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
+        Common::PageType::RasterizerCachedMemory) {
         return GetPointerFromVMA(vaddr);
     }
 
@@ -284,20 +271,20 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
 
     u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
     for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
-        PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
+        Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
 
         if (cached) {
             // Switch page type to cached if now cached
             switch (page_type) {
-            case PageType::Unmapped:
+            case Common::PageType::Unmapped:
                 // It is not necessary for a process to have this region mapped into its address
                 // space, for example, a system module need not have a VRAM mapping.
                 break;
-            case PageType::Memory:
-                page_type = PageType::RasterizerCachedMemory;
+            case Common::PageType::Memory:
+                page_type = Common::PageType::RasterizerCachedMemory;
                 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
                 break;
-            case PageType::RasterizerCachedMemory:
+            case Common::PageType::RasterizerCachedMemory:
                 // There can be more than one GPU region mapped per CPU region, so it's common that
                 // this area is already marked as cached.
                 break;
@@ -307,23 +294,23 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
         } else {
             // Switch page type to uncached if now uncached
             switch (page_type) {
-            case PageType::Unmapped:
+            case Common::PageType::Unmapped:
                 // It is not necessary for a process to have this region mapped into its address
                 // space, for example, a system module need not have a VRAM mapping.
                 break;
-            case PageType::Memory:
+            case Common::PageType::Memory:
                 // There can be more than one GPU region mapped per CPU region, so it's common that
                 // this area is already unmarked as cached.
                 break;
-            case PageType::RasterizerCachedMemory: {
+            case Common::PageType::RasterizerCachedMemory: {
                 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
                 if (pointer == nullptr) {
                     // It's possible that this function has been called while updating the pagetable
                     // after unmapping a VMA. In that case the underlying VMA will no longer exist,
                     // and we should just leave the pagetable entry blank.
-                    page_type = PageType::Unmapped;
+                    page_type = Common::PageType::Unmapped;
                 } else {
-                    page_type = PageType::Memory;
+                    page_type = Common::PageType::Memory;
                     current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
                 }
                 break;
@@ -335,47 +322,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
     }
 }
 
-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
-    auto& system_instance = Core::System::GetInstance();
-
-    // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
-    // null here
-    if (!system_instance.IsPoweredOn()) {
-        return;
-    }
-
-    const VAddr end = start + size;
-
-    const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
-        if (start >= region_end || end <= region_start) {
-            // No overlap with region
-            return;
-        }
-
-        const VAddr overlap_start = std::max(start, region_start);
-        const VAddr overlap_end = std::min(end, region_end);
-        const VAddr overlap_size = overlap_end - overlap_start;
-
-        auto& gpu = system_instance.GPU();
-        switch (mode) {
-        case FlushMode::Flush:
-            gpu.FlushRegion(overlap_start, overlap_size);
-            break;
-        case FlushMode::Invalidate:
-            gpu.InvalidateRegion(overlap_start, overlap_size);
-            break;
-        case FlushMode::FlushAndInvalidate:
-            gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
-            break;
-        }
-    };
-
-    const auto& vm_manager = Core::CurrentProcess()->VMManager();
-
-    CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
-    CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
-}
-
 u8 Read8(const VAddr addr) {
     return Read<u8>(addr);
 }
@@ -406,24 +352,24 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, src_addr, size);
             std::memset(dest_buffer, 0, copy_amount);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
 
             const u8* src_ptr = page_table.pointers[page_index] + page_offset;
             std::memcpy(dest_buffer, src_ptr, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memcpy(dest_buffer, host_ptr, copy_amount);
             break;
         }
         default:
@@ -470,23 +416,23 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, dest_addr, size);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
 
             u8* dest_ptr = page_table.pointers[page_index] + page_offset;
             std::memcpy(dest_ptr, src_buffer, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memcpy(host_ptr, src_buffer, copy_amount);
             break;
         }
         default:
@@ -516,23 +462,23 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, dest_addr, size);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
 
             u8* dest_ptr = page_table.pointers[page_index] + page_offset;
             std::memset(dest_ptr, 0, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memset(host_ptr, 0, copy_amount);
             break;
         }
         default:
@@ -558,23 +504,23 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
             LOG_ERROR(HW_Memory,
                       "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                       current_vaddr, src_addr, size);
             ZeroBlock(process, dest_addr, copy_amount);
             break;
         }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
             DEBUG_ASSERT(page_table.pointers[page_index]);
             const u8* src_ptr = page_table.pointers[page_index] + page_offset;
             WriteBlock(process, dest_addr, src_ptr, copy_amount);
             break;
         }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+            WriteBlock(process, dest_addr, host_ptr, copy_amount);
             break;
         }
         default:
diff --git a/src/core/memory.h b/src/core/memory.h
index 1acf5ce8c..6845f5fe1 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -6,11 +6,11 @@
 
 #include <cstddef>
 #include <string>
-#include <tuple>
-#include <vector>
-#include <boost/icl/interval_map.hpp>
 #include "common/common_types.h"
-#include "core/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}
 
 namespace Kernel {
 class Process;
@@ -26,83 +26,8 @@ constexpr std::size_t PAGE_BITS = 12;
 constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
 constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
 
-enum class PageType : u8 {
-    /// Page is unmapped and should cause an access error.
-    Unmapped,
-    /// Page is mapped to regular memory. This is the only type you can get pointers to.
-    Memory,
-    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
-    /// invalidation
-    RasterizerCachedMemory,
-    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
-    Special,
-};
-
-struct SpecialRegion {
-    enum class Type {
-        DebugHook,
-        IODevice,
-    } type;
-
-    MemoryHookPointer handler;
-
-    bool operator<(const SpecialRegion& other) const {
-        return std::tie(type, handler) < std::tie(other.type, other.handler);
-    }
-
-    bool operator==(const SpecialRegion& other) const {
-        return std::tie(type, handler) == std::tie(other.type, other.handler);
-    }
-};
-
-/**
- * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
- * mimics the way a real CPU page table works.
- */
-struct PageTable {
-    explicit PageTable();
-    explicit PageTable(std::size_t address_space_width_in_bits);
-    ~PageTable();
-
-    /**
-     * Resizes the page table to be able to accomodate enough pages within
-     * a given address space.
-     *
-     * @param address_space_width_in_bits The address size width in bits.
-     */
-    void Resize(std::size_t address_space_width_in_bits);
-
-    /**
-     * Vector of memory pointers backing each page. An entry can only be non-null if the
-     * corresponding entry in the `attributes` vector is of type `Memory`.
-     */
-    std::vector<u8*> pointers;
-
-    /**
-     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
-     * of type `Special`.
-     */
-    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
-
-    /**
-     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
-     * the corresponding entry in `pointers` MUST be set to null.
-     */
-    std::vector<PageType> attributes;
-};
-
 /// Virtual user-space memory regions
 enum : VAddr {
-    /// Read-only page containing kernel and system configuration values.
-    CONFIG_MEMORY_VADDR = 0x1FF80000,
-    CONFIG_MEMORY_SIZE = 0x00001000,
-    CONFIG_MEMORY_VADDR_END = CONFIG_MEMORY_VADDR + CONFIG_MEMORY_SIZE,
-
-    /// Usually read-only page containing mostly values read from hardware.
-    SHARED_PAGE_VADDR = 0x1FF81000,
-    SHARED_PAGE_SIZE = 0x00001000,
-    SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
-
     /// TLS (Thread-Local Storage) related.
     TLS_ENTRY_SIZE = 0x200,
 
@@ -115,9 +40,8 @@ enum : VAddr {
     KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
 };
 
-/// Currently active page table
-void SetCurrentPageTable(PageTable* page_table);
-PageTable* GetCurrentPageTable();
+/// Changes the currently active page table.
+void SetCurrentPageTable(Common::PageTable* page_table);
 
 /// Determines if the given VAddr is valid for the specified process.
 bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
@@ -161,10 +85,4 @@ enum class FlushMode {
  */
 void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
 
-/**
- * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
- * address region.
- */
-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
-
 } // namespace Memory
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 9a1a4f4be..5225ee8e2 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -5,7 +5,11 @@
 #pragma once
 
 #include "common/common_types.h"
-#include "core/memory_hook.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}
 
 namespace Memory {
 
@@ -17,7 +21,7 @@ namespace Memory {
  * @param size The amount of bytes to map. Must be page-aligned.
  * @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
  */
-void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
+void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);
 
 /**
  * Maps a region of the emulated process address space as a IO region.
@@ -26,11 +30,14 @@ void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
  * @param size The amount of bytes to map. Must be page-aligned.
  * @param mmio_handler The handler that backs the mapping.
  */
-void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler);
+void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                 Common::MemoryHookPointer mmio_handler);
 
-void UnmapRegion(PageTable& page_table, VAddr base, u64 size);
+void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
 
-void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
-void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
+void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                  Common::MemoryHookPointer hook);
+void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer hook);
 
 } // namespace Memory
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
index c716a462b..4afd6c8a3 100644
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -18,13 +18,13 @@ using std::chrono::microseconds;
 namespace Core {
 
 void PerfStats::BeginSystemFrame() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     frame_begin = Clock::now();
 }
 
 void PerfStats::EndSystemFrame() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     auto frame_end = Clock::now();
     accumulated_frametime += frame_end - frame_begin;
@@ -35,13 +35,13 @@ void PerfStats::EndSystemFrame() {
 }
 
 void PerfStats::EndGameFrame() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     game_frames += 1;
 }
 
 PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us) {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     const auto now = Clock::now();
     // Walltime elapsed since stats were reset
@@ -67,7 +67,7 @@ PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us
 }
 
 double PerfStats::GetLastFrameTimeScale() {
-    std::lock_guard<std::mutex> lock(object_mutex);
+    std::lock_guard lock{object_mutex};
 
     constexpr double FRAME_LENGTH = 1.0 / 60;
     return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH;
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 6dd3139cc..6d32ebea3 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -82,7 +82,6 @@ void LogSetting(const std::string& name, const T& value) {
 void LogSettings() {
     LOG_INFO(Config, "yuzu Configuration:");
     LogSetting("System_UseDockedMode", Settings::values.use_docked_mode);
-    LogSetting("System_EnableNfc", Settings::values.enable_nfc);
     LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
     LogSetting("System_CurrentUser", Settings::values.current_user);
     LogSetting("System_LanguageIndex", Settings::values.language_index);
diff --git a/src/core/settings.h b/src/core/settings.h
index cdfb2f742..d543eb32f 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -349,7 +349,6 @@ struct TouchscreenInput {
 struct Values {
     // System
     bool use_docked_mode;
-    bool enable_nfc;
     std::optional<u32> rng_seed;
     // Measured in seconds since epoch
     std::optional<std::chrono::seconds> custom_rtc;
diff --git a/src/input_common/keyboard.cpp b/src/input_common/keyboard.cpp
index 525fe6abc..078374be5 100644
--- a/src/input_common/keyboard.cpp
+++ b/src/input_common/keyboard.cpp
@@ -36,18 +36,18 @@ struct KeyButtonPair {
 class KeyButtonList {
 public:
     void AddKeyButton(int key_code, KeyButton* key_button) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         list.push_back(KeyButtonPair{key_code, key_button});
     }
 
     void RemoveKeyButton(const KeyButton* key_button) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         list.remove_if(
             [key_button](const KeyButtonPair& pair) { return pair.key_button == key_button; });
     }
 
     void ChangeKeyStatus(int key_code, bool pressed) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         for (const KeyButtonPair& pair : list) {
             if (pair.key_code == key_code)
                 pair.key_button->status.store(pressed);
@@ -55,7 +55,7 @@ public:
     }
 
     void ChangeAllKeyStatus(bool pressed) {
-        std::lock_guard<std::mutex> guard(mutex);
+        std::lock_guard guard{mutex};
         for (const KeyButtonPair& pair : list) {
             pair.key_button->status.store(pressed);
         }
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 6d96d4019..868251628 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -39,7 +39,7 @@ public:
     void Tilt(int x, int y) {
         auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
         if (is_tilting) {
-            std::lock_guard<std::mutex> guard(tilt_mutex);
+            std::lock_guard guard{tilt_mutex};
             if (mouse_move.x == 0 && mouse_move.y == 0) {
                 tilt_angle = 0;
             } else {
@@ -51,13 +51,13 @@ public:
     }
 
     void EndTilt() {
-        std::lock_guard<std::mutex> guard(tilt_mutex);
+        std::lock_guard guard{tilt_mutex};
         tilt_angle = 0;
         is_tilting = false;
     }
 
     std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
-        std::lock_guard<std::mutex> guard(status_mutex);
+        std::lock_guard guard{status_mutex};
         return status;
     }
 
@@ -93,7 +93,7 @@ private:
             old_q = q;
 
             {
-                std::lock_guard<std::mutex> guard(tilt_mutex);
+                std::lock_guard guard{tilt_mutex};
 
                 // Find the quaternion describing current 3DS tilting
                 q = Common::MakeQuaternion(
@@ -115,7 +115,7 @@ private:
 
             // Update the sensor state
             {
-                std::lock_guard<std::mutex> guard(status_mutex);
+                std::lock_guard guard{status_mutex};
                 status = std::make_tuple(gravity, angular_rate);
             }
         }
diff --git a/src/input_common/sdl/sdl.h b/src/input_common/sdl/sdl.h
index 02a8d2e2c..d7f24c68a 100644
--- a/src/input_common/sdl/sdl.h
+++ b/src/input_common/sdl/sdl.h
@@ -24,17 +24,19 @@ namespace InputCommon::SDL {
 
 class State {
 public:
-    /// Unresisters SDL device factories and shut them down.
+    using Pollers = std::vector<std::unique_ptr<Polling::DevicePoller>>;
+
+    /// Unregisters SDL device factories and shut them down.
     virtual ~State() = default;
 
-    virtual std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
-        InputCommon::Polling::DeviceType type) = 0;
+    virtual Pollers GetPollers(Polling::DeviceType type) = 0;
 };
 
 class NullState : public State {
 public:
-    std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
-        InputCommon::Polling::DeviceType type) override {}
+    Pollers GetPollers(Polling::DeviceType type) override {
+        return {};
+    }
 };
 
 std::unique_ptr<State> Init();
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
index 934339d3b..5949ecbae 100644
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -55,22 +55,22 @@ public:
         : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
 
     void SetButton(int button, bool value) {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         state.buttons[button] = value;
     }
 
     bool GetButton(int button) const {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         return state.buttons.at(button);
     }
 
     void SetAxis(int axis, Sint16 value) {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         state.axes[axis] = value;
     }
 
     float GetAxis(int axis) const {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         return state.axes.at(axis) / 32767.0f;
     }
 
@@ -92,12 +92,12 @@ public:
     }
 
     void SetHat(int hat, Uint8 direction) {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         state.hats[hat] = direction;
     }
 
     bool GetHatDirection(int hat, Uint8 direction) const {
-        std::lock_guard<std::mutex> lock(mutex);
+        std::lock_guard lock{mutex};
         return (state.hats.at(hat) & direction) != 0;
     }
     /**
@@ -140,7 +140,7 @@ private:
  * Get the nth joystick with the corresponding GUID
  */
 std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& guid, int port) {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+    std::lock_guard lock{joystick_map_mutex};
     const auto it = joystick_map.find(guid);
     if (it != joystick_map.end()) {
         while (it->second.size() <= port) {
@@ -161,7 +161,8 @@ std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& g
 std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
     auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
     const std::string guid = GetGUID(sdl_joystick);
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+
+    std::lock_guard lock{joystick_map_mutex};
     auto map_it = joystick_map.find(guid);
     if (map_it != joystick_map.end()) {
         auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
@@ -198,8 +199,9 @@ void SDLState::InitJoystick(int joystick_index) {
         LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
         return;
     }
-    std::string guid = GetGUID(sdl_joystick);
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+    const std::string guid = GetGUID(sdl_joystick);
+
+    std::lock_guard lock{joystick_map_mutex};
     if (joystick_map.find(guid) == joystick_map.end()) {
         auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
         joystick_map[guid].emplace_back(std::move(joystick));
@@ -221,7 +223,7 @@ void SDLState::CloseJoystick(SDL_Joystick* sdl_joystick) {
     std::string guid = GetGUID(sdl_joystick);
     std::shared_ptr<SDLJoystick> joystick;
     {
-        std::lock_guard<std::mutex> lock(joystick_map_mutex);
+        std::lock_guard lock{joystick_map_mutex};
         // This call to guid is safe since the joystick is guaranteed to be in the map
         auto& joystick_guid_list = joystick_map[guid];
         const auto joystick_it =
@@ -274,7 +276,7 @@ void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
 }
 
 void SDLState::CloseJoysticks() {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+    std::lock_guard lock{joystick_map_mutex};
     joystick_map.clear();
 }
 
@@ -475,12 +477,11 @@ SDLState::SDLState() {
 
     initialized = true;
     if (start_thread) {
-        poll_thread = std::thread([&] {
+        poll_thread = std::thread([this] {
             using namespace std::chrono_literals;
-            SDL_Event event;
             while (initialized) {
                 SDL_PumpEvents();
-                std::this_thread::sleep_for(std::chrono::duration(10ms));
+                std::this_thread::sleep_for(10ms);
             }
         });
     }
@@ -651,9 +652,9 @@ private:
 };
 } // namespace Polling
 
-std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> SDLState::GetPollers(
-    InputCommon::Polling::DeviceType type) {
-    std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
+SDLState::Pollers SDLState::GetPollers(InputCommon::Polling::DeviceType type) {
+    Pollers pollers;
+
     switch (type) {
     case InputCommon::Polling::DeviceType::Analog:
         pollers.emplace_back(std::make_unique<Polling::SDLAnalogPoller>(*this));
@@ -661,8 +662,9 @@ std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> SDLState::GetPo
     case InputCommon::Polling::DeviceType::Button:
         pollers.emplace_back(std::make_unique<Polling::SDLButtonPoller>(*this));
         break;
-        return pollers;
     }
+
+    return pollers;
 }
 
 } // namespace SDL
diff --git a/src/input_common/sdl/sdl_impl.h b/src/input_common/sdl/sdl_impl.h
index fec82fbe6..2579741d6 100644
--- a/src/input_common/sdl/sdl_impl.h
+++ b/src/input_common/sdl/sdl_impl.h
@@ -25,7 +25,7 @@ public:
     /// Initializes and registers SDL device factories
     SDLState();
 
-    /// Unresisters SDL device factories and shut them down.
+    /// Unregisters SDL device factories and shut them down.
     ~SDLState() override;
 
     /// Handle SDL_Events for joysticks from SDL_PollEvent
@@ -35,8 +35,7 @@ public:
     std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port);
 
     /// Get all DevicePoller that use the SDL backend for a specific device type
-    std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
-        InputCommon::Polling::DeviceType type) override;
+    Pollers GetPollers(Polling::DeviceType type) override;
 
     /// Used by the Pollers during config
     std::atomic<bool> polling = false;
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 37f09ce5f..c7038b217 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,4 +1,7 @@
 add_executable(tests
+    common/bit_field.cpp
+    common/bit_utils.cpp
+    common/multi_level_queue.cpp
     common/param_package.cpp
     common/ring_buffer.cpp
     core/arm/arm_test_common.cpp
diff --git a/src/tests/common/bit_field.cpp b/src/tests/common/bit_field.cpp
new file mode 100644
index 000000000..8ca1889f9
--- /dev/null
+++ b/src/tests/common/bit_field.cpp
@@ -0,0 +1,90 @@
+// Copyright 2019 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <cstring>
+#include <type_traits>
+#include <catch2/catch.hpp>
+#include "common/bit_field.h"
+
+TEST_CASE("BitField", "[common]") {
+    enum class TestEnum : u32 {
+        A = 0b10111101,
+        B = 0b10101110,
+        C = 0b00001111,
+    };
+
+    union LEBitField {
+        u32_le raw;
+        BitField<0, 6, u32> a;
+        BitField<6, 4, s32> b;
+        BitField<10, 8, TestEnum> c;
+        BitField<18, 14, u32> d;
+    } le_bitfield;
+
+    union BEBitField {
+        u32_be raw;
+        BitFieldBE<0, 6, u32> a;
+        BitFieldBE<6, 4, s32> b;
+        BitFieldBE<10, 8, TestEnum> c;
+        BitFieldBE<18, 14, u32> d;
+    } be_bitfield;
+
+    static_assert(sizeof(LEBitField) == sizeof(u32));
+    static_assert(sizeof(BEBitField) == sizeof(u32));
+    static_assert(std::is_trivially_copyable_v<LEBitField>);
+    static_assert(std::is_trivially_copyable_v<BEBitField>);
+
+    std::array<u8, 4> raw{{
+        0b01101100,
+        0b11110110,
+        0b10111010,
+        0b11101100,
+    }};
+
+    std::memcpy(&le_bitfield, &raw, sizeof(raw));
+    std::memcpy(&be_bitfield, &raw, sizeof(raw));
+
+    // bit fields: 11101100101110'10111101'1001'101100
+    REQUIRE(le_bitfield.raw == 0b11101100'10111010'11110110'01101100);
+    REQUIRE(le_bitfield.a == 0b101100);
+    REQUIRE(le_bitfield.b == -7); // 1001 as two's complement
+    REQUIRE(le_bitfield.c == TestEnum::A);
+    REQUIRE(le_bitfield.d == 0b11101100101110);
+
+    le_bitfield.a.Assign(0b000111);
+    le_bitfield.b.Assign(-1);
+    le_bitfield.c.Assign(TestEnum::C);
+    le_bitfield.d.Assign(0b01010101010101);
+    std::memcpy(&raw, &le_bitfield, sizeof(raw));
+    // bit fields: 01010101010101'00001111'1111'000111
+    REQUIRE(le_bitfield.raw == 0b01010101'01010100'00111111'11000111);
+    REQUIRE(raw == std::array<u8, 4>{{
+                       0b11000111,
+                       0b00111111,
+                       0b01010100,
+                       0b01010101,
+                   }});
+
+    // bit fields: 01101100111101'10101110'1011'101100
+    REQUIRE(be_bitfield.raw == 0b01101100'11110110'10111010'11101100);
+    REQUIRE(be_bitfield.a == 0b101100);
+    REQUIRE(be_bitfield.b == -5); // 1011 as two's complement
+    REQUIRE(be_bitfield.c == TestEnum::B);
+    REQUIRE(be_bitfield.d == 0b01101100111101);
+
+    be_bitfield.a.Assign(0b000111);
+    be_bitfield.b.Assign(-1);
+    be_bitfield.c.Assign(TestEnum::C);
+    be_bitfield.d.Assign(0b01010101010101);
+    std::memcpy(&raw, &be_bitfield, sizeof(raw));
+    // bit fields: 01010101010101'00001111'1111'000111
+    REQUIRE(be_bitfield.raw == 0b01010101'01010100'00111111'11000111);
+    REQUIRE(raw == std::array<u8, 4>{{
+                       0b01010101,
+                       0b01010100,
+                       0b00111111,
+                       0b11000111,
+                   }});
+}
diff --git a/src/tests/common/bit_utils.cpp b/src/tests/common/bit_utils.cpp
new file mode 100644
index 000000000..479b5995a
--- /dev/null
+++ b/src/tests/common/bit_utils.cpp
@@ -0,0 +1,23 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/bit_util.h"
+
+namespace Common {
+
+TEST_CASE("BitUtils::CountTrailingZeroes", "[common]") {
+    REQUIRE(Common::CountTrailingZeroes32(0) == 32);
+    REQUIRE(Common::CountTrailingZeroes64(0) == 64);
+    REQUIRE(Common::CountTrailingZeroes32(9) == 0);
+    REQUIRE(Common::CountTrailingZeroes32(8) == 3);
+    REQUIRE(Common::CountTrailingZeroes32(0x801000) == 12);
+    REQUIRE(Common::CountTrailingZeroes64(9) == 0);
+    REQUIRE(Common::CountTrailingZeroes64(8) == 3);
+    REQUIRE(Common::CountTrailingZeroes64(0x801000) == 12);
+    REQUIRE(Common::CountTrailingZeroes64(0x801000000000UL) == 36);
+}
+
+} // namespace Common
diff --git a/src/tests/common/multi_level_queue.cpp b/src/tests/common/multi_level_queue.cpp
new file mode 100644
index 000000000..cca7ec7da
--- /dev/null
+++ b/src/tests/common/multi_level_queue.cpp
@@ -0,0 +1,55 @@
+// Copyright 2019 Yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/common_types.h"
+#include "common/multi_level_queue.h"
+
+namespace Common {
+
+TEST_CASE("MultiLevelQueue", "[common]") {
+    std::array<f32, 8> values = {0.0, 5.0, 1.0, 9.0, 8.0, 2.0, 6.0, 7.0};
+    Common::MultiLevelQueue<f32, 64> mlq;
+    REQUIRE(mlq.empty());
+    mlq.add(values[2], 2);
+    mlq.add(values[7], 7);
+    mlq.add(values[3], 3);
+    mlq.add(values[4], 4);
+    mlq.add(values[0], 0);
+    mlq.add(values[5], 5);
+    mlq.add(values[6], 6);
+    mlq.add(values[1], 1);
+    u32 index = 0;
+    bool all_set = true;
+    for (auto& f : mlq) {
+        all_set &= (f == values[index]);
+        index++;
+    }
+    REQUIRE(all_set);
+    REQUIRE(!mlq.empty());
+    f32 v = 8.0;
+    mlq.add(v, 2);
+    v = -7.0;
+    mlq.add(v, 2, false);
+    REQUIRE(mlq.front(2) == -7.0);
+    mlq.yield(2);
+    REQUIRE(mlq.front(2) == values[2]);
+    REQUIRE(mlq.back(2) == -7.0);
+    REQUIRE(mlq.empty(8));
+    v = 10.0;
+    mlq.add(v, 8);
+    mlq.adjust(v, 8, 9);
+    REQUIRE(mlq.front(9) == v);
+    REQUIRE(mlq.empty(8));
+    REQUIRE(!mlq.empty(9));
+    mlq.adjust(values[0], 0, 9);
+    REQUIRE(mlq.highest_priority_set() == 1);
+    REQUIRE(mlq.lowest_priority_set() == 9);
+    mlq.remove(values[1], 1);
+    REQUIRE(mlq.highest_priority_set() == 2);
+    REQUIRE(mlq.empty(1));
+}
+
+} // namespace Common
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 6fe56833d..58af41f6e 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -4,6 +4,7 @@
 
 #include <algorithm>
 
+#include "common/page_table.h"
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
 #include "core/memory.h"
@@ -16,18 +17,17 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
     : mutable_memory(mutable_memory_),
       test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
     auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
-    kernel.MakeCurrentProcess(process.get());
     page_table = &process->VMManager().page_table;
 
     std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
     page_table->special_regions.clear();
     std::fill(page_table->attributes.begin(), page_table->attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);
 
     Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
     Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
 
-    Memory::SetCurrentPageTable(page_table);
+    kernel.MakeCurrentProcess(process.get());
 }
 
 TestEnvironment::~TestEnvironment() {
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h
index 0b7539601..d145dbfcc 100644
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -9,10 +9,10 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "common/memory_hook.h"
 #include "core/hle/kernel/kernel.h"
-#include "core/memory_hook.h"
 
-namespace Memory {
+namespace Common {
 struct PageTable;
 }
 
@@ -58,7 +58,7 @@ public:
 
 private:
     friend struct TestMemory;
-    struct TestMemory final : Memory::MemoryHook {
+    struct TestMemory final : Common::MemoryHook {
         explicit TestMemory(TestEnvironment* env_) : env(env_) {}
         TestEnvironment* env;
 
@@ -86,7 +86,7 @@ private:
     bool mutable_memory;
     std::shared_ptr<TestMemory> test_memory;
     std::vector<WriteRecord> write_records;
-    Memory::PageTable* page_table = nullptr;
+    Common::PageTable* page_table = nullptr;
     Kernel::KernelCore kernel;
 };
 
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 14b76680f..242a0d1cd 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -128,7 +128,9 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_scheduler.cpp
         renderer_vulkan/vk_scheduler.h
         renderer_vulkan/vk_stream_buffer.cpp
-        renderer_vulkan/vk_stream_buffer.h)
+        renderer_vulkan/vk_stream_buffer.h
+        renderer_vulkan/vk_swapchain.cpp
+        renderer_vulkan/vk_swapchain.h)
 
     target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
     target_compile_definitions(video_core PRIVATE HAS_VULKAN)
@@ -137,4 +139,4 @@ endif()
 create_target_directory_groups(video_core)
 
 target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad lz4_static)
+target_link_libraries(video_core PRIVATE glad)
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 5ffb492ea..f0ef67535 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -10,7 +10,7 @@ namespace Tegra {
 
 void DebugContext::DoOnEvent(Event event, void* data) {
     {
-        std::unique_lock<std::mutex> lock(breakpoint_mutex);
+        std::unique_lock lock{breakpoint_mutex};
 
         // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
         // show on debug widgets
@@ -32,7 +32,7 @@ void DebugContext::DoOnEvent(Event event, void* data) {
 
 void DebugContext::Resume() {
     {
-        std::lock_guard<std::mutex> lock(breakpoint_mutex);
+        std::lock_guard lock{breakpoint_mutex};
 
         // Tell all observers that we are about to resume
         for (auto& breakpoint_observer : breakpoint_observers) {
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index c235faf46..ac3a2eb01 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -40,7 +40,7 @@ public:
         /// Constructs the object such that it observes events of the given DebugContext.
         explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
             : context_weak(debug_context) {
-            std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex);
+            std::unique_lock lock{debug_context->breakpoint_mutex};
             debug_context->breakpoint_observers.push_back(this);
         }
 
@@ -48,7 +48,7 @@ public:
             auto context = context_weak.lock();
             if (context) {
                 {
-                    std::unique_lock<std::mutex> lock(context->breakpoint_mutex);
+                    std::unique_lock lock{context->breakpoint_mutex};
                     context->breakpoint_observers.remove(this);
                 }
 
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index bff1a37ff..8b1bea1ae 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -55,12 +55,9 @@ bool DmaPusher::Step() {
     }
 
     // Push buffer non-empty, read a word
-    const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
-    ASSERT_MSG(address, "Invalid GPU address");
-
     command_headers.resize(command_list_header.size);
-
-    Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
+    gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
+                                  command_list_header.size * sizeof(u32));
 
     for (const CommandHeader& command_header : command_headers) {
 
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 27a36348c..6ab06518f 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -9,7 +9,6 @@
 
 #include "common/bit_field.h"
 #include "common/common_types.h"
-#include "video_core/memory_manager.h"
 
 namespace Tegra {
 
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 03b7ee5d8..55966eef1 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -6,12 +6,13 @@
 #include "common/logging/log.h"
 #include "common/math_util.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 
 namespace Tegra::Engines {
 
 Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer} {}
+    : rasterizer{rasterizer}, memory_manager{memory_manager} {}
 
 void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
     ASSERT_MSG(method_call.method < Regs::NUM_REGS,
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 80523e320..2e51b7f13 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -10,7 +10,10 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Tegra {
+class MemoryManager;
+}
 
 namespace VideoCore {
 class RasterizerInterface;
@@ -115,10 +118,9 @@ public:
         };
     } regs{};
 
-    MemoryManager& memory_manager;
-
 private:
     VideoCore::RasterizerInterface& rasterizer;
+    MemoryManager& memory_manager;
 
     /// Performs the copy from the source surface to the destination surface as configured in the
     /// registers.
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 6575afd0f..fb6cdf432 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -9,7 +9,10 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Tegra {
+class MemoryManager;
+}
 
 namespace Tegra::Engines {
 
@@ -40,10 +43,11 @@ public:
     static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
                   "KeplerCompute Regs has wrong size");
 
-    MemoryManager& memory_manager;
-
     /// Write the value to the register identified by method.
     void CallMethod(const GPU::MethodCall& method_call);
+
+private:
+    MemoryManager& memory_manager;
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index aae2a4019..cd51a31d7 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -5,16 +5,17 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
 
 namespace Tegra::Engines {
 
 KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                            MemoryManager& memory_manager)
-    : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
 
 KeplerMemory::~KeplerMemory() = default;
 
@@ -40,17 +41,13 @@ void KeplerMemory::ProcessData(u32 data) {
     ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
     ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
 
-    const GPUVAddr address = regs.dest.Address();
-    const auto dest_address =
-        memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
-    ASSERT_MSG(dest_address, "Invalid GPU address");
-
     // We have to invalidate the destination region to evict any outdated surfaces from the cache.
-    // We do this before actually writing the new data because the destination address might contain
-    // a dirty surface that will have to be written back to memory.
-    Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
+    // We do this before actually writing the new data because the destination address might
+    // contain a dirty surface that will have to be written back to memory.
+    const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
+    rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
+    memory_manager.Write<u32>(address, data);
 
-    Memory::Write32(*dest_address, data);
     system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
 
     state.write_offset++;
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 9181e9d80..78b6c3e45 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,12 +10,15 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
 
 namespace Core {
 class System;
 }
 
+namespace Tegra {
+class MemoryManager;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -82,8 +85,8 @@ public:
 
 private:
     Core::System& system;
-    MemoryManager& memory_manager;
     VideoCore::RasterizerInterface& rasterizer;
+    MemoryManager& memory_manager;
 
     void ProcessData(u32 data);
 };
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 144e7fa82..74403eed4 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,11 +7,10 @@
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/core_timing.h"
-#include "core/memory.h"
 #include "video_core/debug_utils/debug_utils.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
 #include "video_core/textures/texture.h"
 
 namespace Tegra::Engines {
@@ -21,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
 
 Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                      MemoryManager& memory_manager)
-    : memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
-      macro_interpreter(*this) {
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
+                                                                                  *this} {
     InitializeRegisterDefaults();
 }
 
@@ -250,6 +249,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
         ProcessQueryGet();
         break;
     }
+    case MAXWELL3D_REG_INDEX(sync_info): {
+        ProcessSyncPoint();
+        break;
+    }
     default:
         break;
     }
@@ -270,11 +273,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
 }
 
 void Maxwell3D::ProcessQueryGet() {
-    GPUVAddr sequence_address = regs.query.QueryAddress();
+    const GPUVAddr sequence_address{regs.query.QueryAddress()};
     // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
     // VAddr before writing.
-    const auto address = memory_manager.GpuToCpuAddress(sequence_address);
-    ASSERT_MSG(address, "Invalid GPU address");
 
     // TODO(Subv): Support the other query units.
     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -309,7 +310,7 @@ void Maxwell3D::ProcessQueryGet() {
             // Write the current query sequence to the sequence address.
             // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
             // query.
-            Memory::Write32(*address, sequence);
+            memory_manager.Write<u32>(sequence_address, sequence);
         } else {
             // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
             // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -318,7 +319,7 @@ void Maxwell3D::ProcessQueryGet() {
             query_result.value = result;
             // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
             query_result.timestamp = system.CoreTiming().GetTicks();
-            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
+            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
         }
         dirty_flags.OnMemoryWrite();
         break;
@@ -329,6 +330,14 @@ void Maxwell3D::ProcessQueryGet() {
     }
 }
 
+void Maxwell3D::ProcessSyncPoint() {
+    const u32 sync_point = regs.sync_info.sync_point.Value();
+    const u32 increment = regs.sync_info.increment.Value();
+    const u32 cache_flush = regs.sync_info.unknown.Value();
+    LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment,
+              cache_flush);
+}
+
 void Maxwell3D::DrawArrays() {
     LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
               regs.vertex_buffer.count);
@@ -393,10 +402,12 @@ void Maxwell3D::ProcessCBData(u32 value) {
     // Don't allow writing past the end of the buffer.
     ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
 
-    const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
-    ASSERT_MSG(address, "Invalid GPU address");
+    const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+
+    u8* ptr{memory_manager.GetPointer(address)};
+    rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
+    memory_manager.Write<u32>(address, value);
 
-    Memory::Write32(*address, value);
     dirty_flags.OnMemoryWrite();
 
     // Increment the current buffer position.
@@ -404,14 +415,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
 }
 
 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
-    const GPUVAddr tic_base_address = regs.tic.TICAddress();
-
-    const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
-    const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
-    ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
+    const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
 
     Texture::TICEntry tic_entry;
-    Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
+    memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
 
     ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
                    tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -429,14 +436,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
 }
 
 Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
-    const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
-
-    const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
-    const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
-    ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
+    const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
 
     Texture::TSCEntry tsc_entry;
-    Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
+    memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
     return tsc_entry;
 }
 
@@ -455,10 +458,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
     for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
          current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
 
-        const auto address = memory_manager.GpuToCpuAddress(current_texture);
-        ASSERT_MSG(address, "Invalid GPU address");
-
-        const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
+        const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
 
         Texture::FullTextureInfo tex_info{};
         // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -493,10 +493,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
 
     ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
 
-    const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
-    ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
-
-    const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
 
     Texture::FullTextureInfo tex_info{};
     tex_info.index = static_cast<u32>(offset);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 7fbf1026e..321af3297 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -16,13 +16,16 @@
 #include "common/math_util.h"
 #include "video_core/gpu.h"
 #include "video_core/macro_interpreter.h"
-#include "video_core/memory_manager.h"
 #include "video_core/textures/texture.h"
 
 namespace Core {
 class System;
 }
 
+namespace Tegra {
+class MemoryManager;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -576,7 +579,17 @@ public:
                     u32 bind;
                 } macros;
 
-                INSERT_PADDING_WORDS(0x188);
+                INSERT_PADDING_WORDS(0x69);
+
+                struct {
+                    union {
+                        BitField<0, 16, u32> sync_point;
+                        BitField<16, 1, u32> unknown;
+                        BitField<20, 1, u32> increment;
+                    };
+                } sync_info;
+
+                INSERT_PADDING_WORDS(0x11E);
 
                 u32 tfb_enabled;
 
@@ -1093,7 +1106,6 @@ public:
     };
 
     State state{};
-    MemoryManager& memory_manager;
 
     struct DirtyFlags {
         std::bitset<8> color_buffer{0xFF};
@@ -1141,6 +1153,8 @@ private:
 
     VideoCore::RasterizerInterface& rasterizer;
 
+    MemoryManager& memory_manager;
+
     /// Start offsets of each macro in macro_memory
     std::unordered_map<u32, u32> macro_offsets;
 
@@ -1180,6 +1194,9 @@ private:
     /// Handles a write to the QUERY_GET register.
     void ProcessQueryGet();
 
+    /// Handles writes to syncing register.
+    void ProcessSyncPoint();
+
     /// Handles a write to the CB_DATA[i] register.
     void ProcessCBData(u32 value);
 
@@ -1195,6 +1212,7 @@ private:
                   "Field " #field_name " has invalid position")
 
 ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(sync_info, 0xB2);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform, 0x280);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 9dfea5999..2426d0067 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -5,17 +5,18 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
 #include "video_core/textures/decoders.h"
 
 namespace Tegra::Engines {
 
 MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                        MemoryManager& memory_manager)
-    : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
 
 void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
     ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -42,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
     const GPUVAddr source = regs.src_address.Address();
     const GPUVAddr dest = regs.dst_address.Address();
 
-    const auto source_cpu = memory_manager.GpuToCpuAddress(source);
-    const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
-    ASSERT_MSG(source_cpu, "Invalid source GPU address");
-    ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
-
     // TODO(Subv): Perform more research and implement all features of this engine.
     ASSERT(regs.exec.enable_swizzle == 0);
     ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -69,7 +65,7 @@ void MaxwellDMA::HandleCopy() {
         // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
         // y_count).
         if (!regs.exec.enable_2d) {
-            Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
+            memory_manager.CopyBlock(dest, source, regs.x_count);
             return;
         }
 
@@ -78,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
         // rectangle. There is no need to manually flush/invalidate the regions because
         // CopyBlock does that for us.
         for (u32 line = 0; line < regs.y_count; ++line) {
-            const VAddr source_line = *source_cpu + line * regs.src_pitch;
-            const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
-            Memory::CopyBlock(dest_line, source_line, regs.x_count);
+            const GPUVAddr source_line = source + line * regs.src_pitch;
+            const GPUVAddr dest_line = dest + line * regs.dst_pitch;
+            memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
         }
         return;
     }
@@ -89,15 +85,28 @@ void MaxwellDMA::HandleCopy() {
 
     const std::size_t copy_size = regs.x_count * regs.y_count;
 
+    auto source_ptr{memory_manager.GetPointer(source)};
+    auto dst_ptr{memory_manager.GetPointer(dest)};
+
+    if (!source_ptr) {
+        LOG_ERROR(HW_GPU, "source_ptr is invalid");
+        return;
+    }
+
+    if (!dst_ptr) {
+        LOG_ERROR(HW_GPU, "dst_ptr is invalid");
+        return;
+    }
+
     const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
         // copying.
-        Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
+        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
 
         // We have to invalidate the destination region to evict any outdated surfaces from the
         // cache. We do this before actually writing the new data because the destination address
         // might contain a dirty surface that will have to be written back to memory.
-        Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
+        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
     };
 
     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -110,8 +119,8 @@ void MaxwellDMA::HandleCopy() {
                            copy_size * src_bytes_per_pixel);
 
         Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
-                                  *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
+                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
+                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
                                   regs.src_params.pos_y);
     } else {
         ASSERT(regs.dst_params.size_z == 1);
@@ -124,7 +133,7 @@ void MaxwellDMA::HandleCopy() {
 
         // If the input is linear and the output is tiled, swizzle the input and copy it over.
         Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
+                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
     }
 }
 
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 34c369320..c6b649842 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -10,12 +10,15 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
 
 namespace Core {
 class System;
 }
 
+namespace Tegra {
+class MemoryManager;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -139,13 +142,13 @@ public:
         };
     } regs{};
 
-    MemoryManager& memory_manager;
-
 private:
     Core::System& system;
 
     VideoCore::RasterizerInterface& rasterizer;
 
+    MemoryManager& memory_manager;
+
     /// Performs the copy from the source buffer to the destination buffer as configured in the
     /// registers.
     void HandleCopy();
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7f613370b..363e53be1 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1662,7 +1662,7 @@ private:
             INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
             INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
             INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
-            INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
+            INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
             INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
             INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
             INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 08abf8ac9..4461083ff 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,6 +12,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_base.h"
 
 namespace Tegra {
@@ -30,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
 
 GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
     auto& rasterizer{renderer.Rasterizer()};
-    memory_manager = std::make_unique<Tegra::MemoryManager>();
+    memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
     dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
     maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
     fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
@@ -274,7 +275,6 @@ void GPU::ProcessSemaphoreTriggerMethod() {
     const auto op =
         static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
     if (op == GpuSemaphoreOperation::WriteLong) {
-        auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
         struct Block {
             u32 sequence;
             u32 zeros = 0;
@@ -286,11 +286,10 @@ void GPU::ProcessSemaphoreTriggerMethod() {
         // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
         // CoreTiming
         block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
-        Memory::WriteBlock(*address, &block, sizeof(block));
+        memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
+                                   sizeof(block));
     } else {
-        const auto address =
-            memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-        const u32 word = Memory::Read32(*address);
+        const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
         if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
             (op == GpuSemaphoreOperation::AcquireGequal &&
              static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -317,13 +316,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
 }
 
 void GPU::ProcessSemaphoreRelease() {
-    const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-    Memory::Write32(*address, regs.semaphore_release);
+    memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
 }
 
 void GPU::ProcessSemaphoreAcquire() {
-    const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-    const u32 word = Memory::Read32(*address);
+    const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
     const auto value = regs.semaphore_acquire;
     if (word != value) {
         regs.acquire_active = true;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 56a203275..de30ea354 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -9,7 +9,11 @@
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/dma_pusher.h"
-#include "video_core/memory_manager.h"
+
+using CacheAddr = std::uintptr_t;
+inline CacheAddr ToCacheAddr(const void* host_ptr) {
+    return reinterpret_cast<CacheAddr>(host_ptr);
+}
 
 namespace Core {
 class System;
@@ -119,6 +123,8 @@ enum class EngineID {
     MAXWELL_DMA_COPY_A = 0xB0B5,
 };
 
+class MemoryManager;
+
 class GPU {
 public:
     explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
@@ -171,11 +177,11 @@ public:
                     u32 address_high;
                     u32 address_low;
 
-                    GPUVAddr SmaphoreAddress() const {
+                    GPUVAddr SemaphoreAddress() const {
                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
                                                      address_low);
                     }
-                } smaphore_address;
+                } semaphore_address;
 
                 u32 semaphore_sequence;
                 u32 semaphore_trigger;
@@ -209,13 +215,13 @@ public:
         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(VAddr addr, u64 size) = 0;
+    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
 
 private:
     void ProcessBindMethod(const MethodCall& method_call);
@@ -239,9 +245,8 @@ protected:
 private:
     std::unique_ptr<Tegra::MemoryManager> memory_manager;
 
-    /// Mapping of command subchannels to their bound engine ids.
+    /// Mapping of command subchannels to their bound engine ids
     std::array<EngineID, 8> bound_engines = {};
-
     /// 3D engine
     std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
     /// 2D engine
@@ -258,7 +263,7 @@ private:
     static_assert(offsetof(GPU::Regs, field_name) == position * 4,                                 \
                   "Field " #field_name " has invalid position")
 
-ASSERT_REG_POSITION(smaphore_address, 0x4);
+ASSERT_REG_POSITION(semaphore_address, 0x4);
 ASSERT_REG_POSITION(semaphore_sequence, 0x6);
 ASSERT_REG_POSITION(semaphore_trigger, 0x7);
 ASSERT_REG_POSITION(reference_count, 0x14);
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index ad0a747e3..db507cf04 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -9,7 +9,7 @@
 namespace VideoCommon {
 
 GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
-    : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
+    : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {}
 
 GPUAsynch::~GPUAsynch() = default;
 
@@ -22,15 +22,15 @@ void GPUAsynch::SwapBuffers(
     gpu_thread.SwapBuffers(std::move(framebuffer));
 }
 
-void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
+void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
     gpu_thread.FlushRegion(addr, size);
 }
 
-void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
+void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
     gpu_thread.InvalidateRegion(addr, size);
 }
 
-void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
     gpu_thread.FlushAndInvalidateRegion(addr, size);
 }
 
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index e6a807aba..1dcc61a6c 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -26,9 +26,9 @@ public:
     void PushGPUEntries(Tegra::CommandList&& entries) override;
     void SwapBuffers(
         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
-    void FlushRegion(VAddr addr, u64 size) override;
-    void InvalidateRegion(VAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
 
 private:
     GPUThread::ThreadManager gpu_thread;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 4c00b96c7..2cfc900ed 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -22,15 +22,15 @@ void GPUSynch::SwapBuffers(
     renderer.SwapBuffers(std::move(framebuffer));
 }
 
-void GPUSynch::FlushRegion(VAddr addr, u64 size) {
+void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
     renderer.Rasterizer().FlushRegion(addr, size);
 }
 
-void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
+void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
     renderer.Rasterizer().InvalidateRegion(addr, size);
 }
 
-void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
     renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
 }
 
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 7d5a241ff..766b5631c 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -21,9 +21,9 @@ public:
     void PushGPUEntries(Tegra::CommandList&& entries) override;
     void SwapBuffers(
         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
-    void FlushRegion(VAddr addr, u64 size) override;
-    void InvalidateRegion(VAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c5bdd2a17..cc56cf467 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,8 +4,10 @@
 
 #include "common/assert.h"
 #include "common/microprofile.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/frontend/scope_acquire_window_context.h"
-#include "core/settings.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
@@ -13,38 +15,13 @@
 
 namespace VideoCommon::GPUThread {
 
-/// Executes a single GPU thread command
-static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
-                           Tegra::DmaPusher& dma_pusher) {
-    if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
-        dma_pusher.Push(std::move(submit_list->entries));
-        dma_pusher.DispatchCalls();
-    } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
-        renderer.SwapBuffers(data->framebuffer);
-    } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
-        renderer.Rasterizer().FlushRegion(data->addr, data->size);
-    } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
-        renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
-    } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
-        renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
-    } else {
-        UNREACHABLE();
-    }
-}
-
 /// Runs the GPU thread
 static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
                       SynchState& state) {
-
     MicroProfileOnThreadCreate("GpuThread");
 
-    auto WaitForWakeup = [&]() {
-        std::unique_lock<std::mutex> lock{state.signal_mutex};
-        state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
-    };
-
     // Wait for first GPU command before acquiring the window context
-    WaitForWakeup();
+    state.WaitForCommands();
 
     // If emulation was stopped during disk shader loading, abort before trying to acquire context
     if (!state.is_running) {
@@ -53,99 +30,91 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
 
     Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
 
+    CommandDataContainer next;
     while (state.is_running) {
-        if (!state.is_running) {
-            return;
-        }
-
-        {
-            // Thread has been woken up, so make the previous write queue the next read queue
-            std::lock_guard<std::mutex> lock{state.signal_mutex};
-            std::swap(state.push_queue, state.pop_queue);
+        state.WaitForCommands();
+        while (!state.queue.Empty()) {
+            state.queue.Pop(next);
+            if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
+                dma_pusher.Push(std::move(submit_list->entries));
+                dma_pusher.DispatchCalls();
+            } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
+                renderer.SwapBuffers(std::move(data->framebuffer));
+            } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
+                renderer.Rasterizer().FlushRegion(data->addr, data->size);
+            } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
+                renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
+            } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
+                return;
+            } else {
+                UNREACHABLE();
+            }
+            state.signaled_fence = next.fence;
+            state.TrySynchronize();
         }
-
-        // Execute all of the GPU commands
-        while (!state.pop_queue->empty()) {
-            ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
-            state.pop_queue->pop();
-        }
-
-        state.UpdateIdleState();
-
-        // Signal that the GPU thread has finished processing commands
-        if (state.is_idle) {
-            state.idle_condition.notify_one();
-        }
-
-        // Wait for CPU thread to send more GPU commands
-        WaitForWakeup();
     }
 }
 
-ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
-    : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
-                                                         std::ref(dma_pusher), std::ref(state)},
-      thread_id{thread.get_id()} {}
+ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
+                             Tegra::DmaPusher& dma_pusher)
+    : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} {
+    synchronization_event = system.CoreTiming().RegisterEvent(
+        "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
+}
 
 ThreadManager::~ThreadManager() {
-    {
-        // Notify GPU thread that a shutdown is pending
-        std::lock_guard<std::mutex> lock{state.signal_mutex};
-        state.is_running = false;
-    }
-
-    state.signal_condition.notify_one();
+    // Notify GPU thread that a shutdown is pending
+    PushCommand(EndProcessingCommand());
     thread.join();
 }
 
 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
-    if (entries.empty()) {
-        return;
-    }
-
-    PushCommand(SubmitListCommand(std::move(entries)), false, false);
+    const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
+    const s64 synchronization_ticks{Core::Timing::usToCycles(9000)};
+    system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
 }
 
 void ThreadManager::SwapBuffers(
     std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
-    PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
+    PushCommand(SwapBuffersCommand(std::move(framebuffer)));
 }
 
-void ThreadManager::FlushRegion(VAddr addr, u64 size) {
-    // Block the CPU when using accurate emulation
-    PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
+void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
+    PushCommand(FlushRegionCommand(addr, size));
 }
 
-void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
-    PushCommand(InvalidateRegionCommand(addr, size), true, true);
+void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
+    if (state.queue.Empty()) {
+        // It's quicker to invalidate a single region on the CPU if the queue is already empty
+        system.Renderer().Rasterizer().InvalidateRegion(addr, size);
+    } else {
+        PushCommand(InvalidateRegionCommand(addr, size));
+    }
 }
 
-void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+    // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
     InvalidateRegion(addr, size);
 }
 
-void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
-    {
-        std::lock_guard<std::mutex> lock{state.signal_mutex};
-
-        if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
-            // Execute the command synchronously on the current thread
-            ExecuteCommand(&command_data, renderer, dma_pusher);
-            return;
-        }
+u64 ThreadManager::PushCommand(CommandData&& command_data) {
+    const u64 fence{++state.last_fence};
+    state.queue.Push(CommandDataContainer(std::move(command_data), fence));
+    state.SignalCommands();
+    return fence;
+}
 
-        // Push the command to the GPU thread
-        state.UpdateIdleState();
-        state.push_queue->emplace(command_data);
+MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
+void SynchState::WaitForSynchronization(u64 fence) {
+    if (signaled_fence >= fence) {
+        return;
     }
 
-    // Signal the GPU thread that commands are pending
-    state.signal_condition.notify_one();
-
-    if (wait_for_idle) {
-        // Wait for the GPU to be idle (all commands to be executed)
-        std::unique_lock<std::mutex> lock{state.idle_mutex};
-        state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
+    // Wait for the GPU to be idle (all commands to be executed)
+    {
+        MICROPROFILE_SCOPE(GPU_wait);
+        std::unique_lock<std::mutex> lock{synchronization_mutex};
+        synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
     }
 }
 
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index edb148b14..62bcea5bb 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -4,26 +4,33 @@
 
 #pragma once
 
-#include <array>
 #include <atomic>
 #include <condition_variable>
-#include <memory>
 #include <mutex>
 #include <optional>
 #include <thread>
 #include <variant>
 
+#include "common/threadsafe_queue.h"
+#include "video_core/gpu.h"
+
 namespace Tegra {
 struct FramebufferConfig;
 class DmaPusher;
 } // namespace Tegra
 
-namespace VideoCore {
-class RendererBase;
-} // namespace VideoCore
+namespace Core {
+class System;
+namespace Timing {
+struct EventType;
+} // namespace Timing
+} // namespace Core
 
 namespace VideoCommon::GPUThread {
 
+/// Command to signal to the GPU thread that processing has ended
+struct EndProcessingCommand final {};
+
 /// Command to signal to the GPU thread that a command list is ready for processing
 struct SubmitListCommand final {
     explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
@@ -36,65 +43,103 @@ struct SwapBuffersCommand final {
     explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
         : framebuffer{std::move(framebuffer)} {}
 
-    std::optional<const Tegra::FramebufferConfig> framebuffer;
+    std::optional<Tegra::FramebufferConfig> framebuffer;
 };
 
 /// Command to signal to the GPU thread to flush a region
 struct FlushRegionCommand final {
-    explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+    explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
 
-    const VAddr addr;
-    const u64 size;
+    CacheAddr addr;
+    u64 size;
 };
 
 /// Command to signal to the GPU thread to invalidate a region
 struct InvalidateRegionCommand final {
-    explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+    explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
 
-    const VAddr addr;
-    const u64 size;
+    CacheAddr addr;
+    u64 size;
 };
 
 /// Command to signal to the GPU thread to flush and invalidate a region
 struct FlushAndInvalidateRegionCommand final {
-    explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
+    explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
         : addr{addr}, size{size} {}
 
-    const VAddr addr;
-    const u64 size;
+    CacheAddr addr;
+    u64 size;
 };
 
-using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
-                                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+using CommandData =
+    std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+
+struct CommandDataContainer {
+    CommandDataContainer() = default;
+
+    CommandDataContainer(CommandData&& data, u64 next_fence)
+        : data{std::move(data)}, fence{next_fence} {}
+
+    CommandDataContainer& operator=(const CommandDataContainer& t) {
+        data = std::move(t.data);
+        fence = t.fence;
+        return *this;
+    }
+
+    CommandData data;
+    u64 fence{};
+};
 
 /// Struct used to synchronize the GPU thread
 struct SynchState final {
-    std::atomic<bool> is_running{true};
-    std::atomic<bool> is_idle{true};
-    std::condition_variable signal_condition;
-    std::mutex signal_mutex;
-    std::condition_variable idle_condition;
-    std::mutex idle_mutex;
-
-    // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
-    // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
-    // empty. This allows for efficient thread-safe access, as it does not require any copies.
-
-    using CommandQueue = std::queue<CommandData>;
-    std::array<CommandQueue, 2> command_queues;
-    CommandQueue* push_queue{&command_queues[0]};
-    CommandQueue* pop_queue{&command_queues[1]};
-
-    void UpdateIdleState() {
-        std::lock_guard<std::mutex> lock{idle_mutex};
-        is_idle = command_queues[0].empty() && command_queues[1].empty();
+    std::atomic_bool is_running{true};
+    std::atomic_int queued_frame_count{};
+    std::mutex synchronization_mutex;
+    std::mutex commands_mutex;
+    std::condition_variable commands_condition;
+    std::condition_variable synchronization_condition;
+
+    /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
+    /// synchronized. This is entirely empirical.
+    bool IsSynchronized() const {
+        constexpr std::size_t max_queue_gap{5};
+        return queue.Size() <= max_queue_gap;
+    }
+
+    void TrySynchronize() {
+        if (IsSynchronized()) {
+            std::lock_guard<std::mutex> lock{synchronization_mutex};
+            synchronization_condition.notify_one();
+        }
     }
+
+    void WaitForSynchronization(u64 fence);
+
+    void SignalCommands() {
+        if (queue.Empty()) {
+            return;
+        }
+
+        commands_condition.notify_one();
+    }
+
+    void WaitForCommands() {
+        std::unique_lock lock{commands_mutex};
+        commands_condition.wait(lock, [this] { return !queue.Empty(); });
+    }
+
+    using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+    CommandQueue queue;
+    u64 last_fence{};
+    std::atomic<u64> signaled_fence{};
 };
 
 /// Class used to manage the GPU thread
 class ThreadManager final {
 public:
-    explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+    explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
+                           Tegra::DmaPusher& dma_pusher);
     ~ThreadManager();
 
     /// Push GPU command entries to be processed
@@ -105,27 +150,22 @@ public:
         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    void FlushRegion(VAddr addr, u64 size);
+    void FlushRegion(CacheAddr addr, u64 size);
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
-    void InvalidateRegion(VAddr addr, u64 size);
+    void InvalidateRegion(CacheAddr addr, u64 size);
 
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    void FlushAndInvalidateRegion(VAddr addr, u64 size);
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
 
 private:
     /// Pushes a command to be executed by the GPU thread
-    void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
-
-    /// Returns true if this is called by the GPU thread
-    bool IsGpuThread() const {
-        return std::this_thread::get_id() == thread_id;
-    }
+    u64 PushCommand(CommandData&& command_data);
 
 private:
     SynchState state;
-    VideoCore::RendererBase& renderer;
-    Tegra::DmaPusher& dma_pusher;
+    Core::System& system;
+    Core::Timing::EventType* synchronization_event{};
     std::thread thread;
     std::thread::id thread_id;
 };
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 64f75db43..524d9ea5a 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -223,27 +223,21 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
 }
 
 u32 MacroInterpreter::FetchParameter() {
-    ASSERT(next_parameter_index < parameters.size());
-    return parameters[next_parameter_index++];
+    return parameters.at(next_parameter_index++);
 }
 
 u32 MacroInterpreter::GetRegister(u32 register_id) const {
-    // Register 0 is supposed to always return 0.
-    if (register_id == 0)
-        return 0;
-
-    ASSERT(register_id < registers.size());
-    return registers[register_id];
+    return registers.at(register_id);
 }
 
 void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
-    // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
-    // register.
-    if (register_id == 0)
+    // Register 0 is hardwired as the zero register.
+    // Ensure no writes to it actually occur.
+    if (register_id == 0) {
         return;
+    }
 
-    ASSERT(register_id < registers.size());
-    registers[register_id] = value;
+    registers.at(register_id) = value;
 }
 
 void MacroInterpreter::SetMethodAddress(u32 address) {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 54abe5298..0f4e820aa 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,181 +5,528 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/memory.h"
 #include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
 
 namespace Tegra {
 
-MemoryManager::MemoryManager() {
-    // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
-    // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
-    // Undertale using 0 for a render target.
-    PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
+MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {
+    std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
+    std::fill(page_table.attributes.begin(), page_table.attributes.end(),
+              Common::PageType::Unmapped);
+    page_table.Resize(address_space_width);
+
+    // Initialize the map with a single free region covering the entire managed space.
+    VirtualMemoryArea initial_vma;
+    initial_vma.size = address_space_end;
+    vma_map.emplace(initial_vma.base, initial_vma);
+
+    UpdatePageTableForVMA(initial_vma);
 }
 
 GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
-    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
 
-    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+    AllocateMemory(gpu_addr, 0, aligned_size);
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(*gpu_addr + offset)};
+    return gpu_addr;
+}
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
 
-        slot = static_cast<u64>(PageStatus::Allocated);
-    }
+    AllocateMemory(gpu_addr, 0, aligned_size);
 
-    return *gpu_addr;
+    return gpu_addr;
 }
 
-GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+    MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
 
-        slot = static_cast<u64>(PageStatus::Allocated);
-    }
+    return gpu_addr;
+}
+
+GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
+    ASSERT((gpu_addr & page_mask) == 0);
+
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+
+    MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
 
     return gpu_addr;
 }
 
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
-    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};
+GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
+    ASSERT((gpu_addr & page_mask) == 0);
+
+    const u64 aligned_size{Common::AlignUp(size, page_size)};
+    const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
+
+    rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
+    UnmapRange(gpu_addr, aligned_size);
 
-    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+    return gpu_addr;
+}
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(*gpu_addr + offset)};
+GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const {
+    // Find the first Free VMA.
+    const VMAHandle vma_handle{
+        std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) {
+            if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
+                return false;
+            }
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+            const VAddr vma_end{vma.second.base + vma.second.size};
+            return vma_end > region_start && vma_end >= region_start + size;
+        })};
 
-        slot = cpu_addr + offset;
+    if (vma_handle == vma_map.end()) {
+        return {};
     }
 
-    const MappedRegion region{cpu_addr, *gpu_addr, size};
-    mapped_regions.push_back(region);
+    return std::max(region_start, vma_handle->second.base);
+}
 
-    return *gpu_addr;
+bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
+    return (addr >> page_bits) < page_table.pointers.size();
 }
 
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
-    ASSERT((gpu_addr & PAGE_MASK) == 0);
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const {
+    if (!IsAddressValid(addr)) {
+        return {};
+    }
 
-    if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
-        // Page has been already mapped. In this case, we must find a new area of memory to use that
-        // is different than the specified one. Super Mario Odyssey hits this scenario when changing
-        // areas, but we do not want to overwrite the old pages.
-        // TODO(bunnei): We need to write a hardware test to confirm this behavior.
+    const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
+    if (cpu_addr) {
+        return cpu_addr + (addr & page_mask);
+    }
+
+    return {};
+}
 
-        LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
+template <typename T>
+T MemoryManager::Read(GPUVAddr addr) const {
+    if (!IsAddressValid(addr)) {
+        return {};
+    }
 
-        const std::optional<GPUVAddr> new_gpu_addr{
-            FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
+    const u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer) {
+        // NOTE: Avoid adding any extra logic to this fast-path block
+        T value;
+        std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
+        return value;
+    }
 
-        ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
+    switch (page_table.attributes[addr >> page_bits]) {
+    case Common::PageType::Unmapped:
+        LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr);
+        return 0;
+    case Common::PageType::Memory:
+        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+        break;
+    default:
+        UNREACHABLE();
+    }
+    return {};
+}
 
-        gpu_addr = *new_gpu_addr;
+template <typename T>
+void MemoryManager::Write(GPUVAddr addr, T data) {
+    if (!IsAddressValid(addr)) {
+        return;
     }
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+    u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer) {
+        // NOTE: Avoid adding any extra logic to this fast-path block
+        std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
+        return;
+    }
 
-        ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
+    switch (page_table.attributes[addr >> page_bits]) {
+    case Common::PageType::Unmapped:
+        LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
+                  static_cast<u32>(data), addr);
+        return;
+    case Common::PageType::Memory:
+        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+        break;
+    default:
+        UNREACHABLE();
+    }
+}
 
-        slot = cpu_addr + offset;
+template u8 MemoryManager::Read<u8>(GPUVAddr addr) const;
+template u16 MemoryManager::Read<u16>(GPUVAddr addr) const;
+template u32 MemoryManager::Read<u32>(GPUVAddr addr) const;
+template u64 MemoryManager::Read<u64>(GPUVAddr addr) const;
+template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
+template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
+template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
+template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
+
+u8* MemoryManager::GetPointer(GPUVAddr addr) {
+    if (!IsAddressValid(addr)) {
+        return {};
     }
 
-    const MappedRegion region{cpu_addr, gpu_addr, size};
-    mapped_regions.push_back(region);
+    u8* const page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer != nullptr) {
+        return page_pointer + (addr & page_mask);
+    }
 
-    return gpu_addr;
+    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
+    return {};
 }
 
-GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
-    ASSERT((gpu_addr & PAGE_MASK) == 0);
+const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
+    if (!IsAddressValid(addr)) {
+        return {};
+    }
 
-    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot{PageSlot(gpu_addr + offset)};
+    const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
+    if (page_pointer != nullptr) {
+        return page_pointer + (addr & page_mask);
+    }
 
-        ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
-               slot != static_cast<u64>(PageStatus::Unmapped));
+    LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
+    return {};
+}
 
-        slot = static_cast<u64>(PageStatus::Unmapped);
-    }
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
+    std::size_t remaining_size{size};
+    std::size_t page_index{src_addr >> page_bits};
+    std::size_t page_offset{src_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+        switch (page_table.attributes[page_index]) {
+        case Common::PageType::Memory: {
+            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+            std::memcpy(dest_buffer, src_ptr, copy_amount);
+            break;
+        }
+        default:
+            UNREACHABLE();
+        }
 
-    // Delete the region mappings that are contained within the unmapped region
-    mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(),
-                                        [&](const MappedRegion& region) {
-                                            return region.gpu_addr <= gpu_addr &&
-                                                   region.gpu_addr + region.size < gpu_addr + size;
-                                        }),
-                         mapped_regions.end());
-    return gpu_addr;
+        page_index++;
+        page_offset = 0;
+        dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+        remaining_size -= copy_amount;
+    }
 }
 
-GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
-    for (const auto& region : mapped_regions) {
-        const GPUVAddr region_end{region.gpu_addr + region.size};
-        if (region_start >= region.gpu_addr && region_start < region_end) {
-            return region_end;
+void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
+    std::size_t remaining_size{size};
+    std::size_t page_index{dest_addr >> page_bits};
+    std::size_t page_offset{dest_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+        switch (page_table.attributes[page_index]) {
+        case Common::PageType::Memory: {
+            u8* dest_ptr{page_table.pointers[page_index] + page_offset};
+            rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
+            std::memcpy(dest_ptr, src_buffer, copy_amount);
+            break;
         }
+        default:
+            UNREACHABLE();
+        }
+
+        page_index++;
+        page_offset = 0;
+        src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+        remaining_size -= copy_amount;
     }
-    return {};
 }
 
-std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
-                                                     PageStatus status) {
-    GPUVAddr gpu_addr{region_start};
-    u64 free_space{};
-    align = (align + PAGE_MASK) & ~PAGE_MASK;
+void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
+    std::size_t remaining_size{size};
+    std::size_t page_index{src_addr >> page_bits};
+    std::size_t page_offset{src_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+        switch (page_table.attributes[page_index]) {
+        case Common::PageType::Memory: {
+            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+            WriteBlock(dest_addr, src_ptr, copy_amount);
+            break;
+        }
+        default:
+            UNREACHABLE();
+        }
 
-    while (gpu_addr + free_space < MAX_ADDRESS) {
-        if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
-            free_space += PAGE_SIZE;
-            if (free_space >= size) {
-                return gpu_addr;
-            }
-        } else {
-            gpu_addr += free_space + PAGE_SIZE;
-            free_space = 0;
-            gpu_addr = Common::AlignUp(gpu_addr, align);
+        page_index++;
+        page_offset = 0;
+        dest_addr += static_cast<VAddr>(copy_amount);
+        src_addr += static_cast<VAddr>(copy_amount);
+        remaining_size -= copy_amount;
+    }
+}
+
+void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+                             VAddr backing_addr) {
+    LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
+              (base + size) * page_size);
+
+    const VAddr end{base + size};
+    ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
+               base + page_table.pointers.size());
+
+    std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
+
+    if (memory == nullptr) {
+        std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
+        std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end,
+                  backing_addr);
+    } else {
+        while (base != end) {
+            page_table.pointers[base] = memory;
+            page_table.backing_addr[base] = backing_addr;
+
+            base += 1;
+            memory += page_size;
+            backing_addr += page_size;
         }
     }
+}
 
-    return {};
+void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+    MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
 }
 
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
-    const VAddr base_addr{PageSlot(gpu_addr)};
+void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+    MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
+}
 
-    if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
-        base_addr == static_cast<u64>(PageStatus::Unmapped) ||
-        base_addr == static_cast<u64>(PageStatus::Reserved)) {
+bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
+    ASSERT(base + size == next.base);
+    if (type != next.type) {
+        return {};
+    }
+    if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
+        return {};
+    }
+    if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
         return {};
     }
+    return true;
+}
+
+MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
+    if (target >= address_space_end) {
+        return vma_map.end();
+    } else {
+        return std::prev(vma_map.upper_bound(target));
+    }
+}
+
+MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    vma.type = VirtualMemoryArea::Type::Allocated;
+    vma.backing_addr = 0;
+    vma.backing_memory = {};
+    UpdatePageTableForVMA(vma);
+
+    return MergeAdjacent(vma_handle);
+}
+
+MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
+                                                       u64 size) {
+
+    // This is the appropriately sized VMA that will turn into our allocation.
+    VMAIter vma_handle{CarveVMA(target, size)};
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    ASSERT(vma.size == size);
+
+    vma.offset = offset;
+
+    return Allocate(vma_handle);
+}
+
+MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
+                                                         VAddr backing_addr) {
+    // This is the appropriately sized VMA that will turn into our allocation.
+    VMAIter vma_handle{CarveVMA(target, size)};
+    VirtualMemoryArea& vma{vma_handle->second};
+
+    ASSERT(vma.size == size);
+
+    vma.type = VirtualMemoryArea::Type::Mapped;
+    vma.backing_memory = memory;
+    vma.backing_addr = backing_addr;
+    UpdatePageTableForVMA(vma);
+
+    return MergeAdjacent(vma_handle);
+}
+
+void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
+    VMAIter vma{CarveVMARange(target, size)};
+    const VAddr target_end{target + size};
+    const VMAIter end{vma_map.end()};
+
+    // The comparison against the end of the range must be done using addresses since VMAs can be
+    // merged during this process, causing invalidation of the iterators.
+    while (vma != end && vma->second.base < target_end) {
+        // Unmapped ranges return to allocated state and can be reused
+        // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
+        vma = std::next(Allocate(vma));
+    }
+
+    ASSERT(FindVMA(target)->second.size >= size);
+}
 
-    return base_addr + (gpu_addr & PAGE_MASK);
+MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
+    // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
+    // non-const access to its container.
+    return vma_map.erase(iter, iter); // Erases an empty range of elements
 }
 
-std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
-    std::vector<GPUVAddr> results;
-    for (const auto& region : mapped_regions) {
-        if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
-            const u64 offset{cpu_addr - region.cpu_addr};
-            results.push_back(region.gpu_addr + offset);
+MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+    ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
+
+    VMAIter vma_handle{StripIterConstness(FindVMA(base))};
+    if (vma_handle == vma_map.end()) {
+        // Target address is outside the managed range
+        return {};
+    }
+
+    const VirtualMemoryArea& vma{vma_handle->second};
+    if (vma.type == VirtualMemoryArea::Type::Mapped) {
+        // Region is already allocated
+        return vma_handle;
+    }
+
+    const VAddr start_in_vma{base - vma.base};
+    const VAddr end_in_vma{start_in_vma + size};
+
+    ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
+               vma.size, end_in_vma);
+
+    if (end_in_vma < vma.size) {
+        // Split VMA at the end of the allocated region
+        SplitVMA(vma_handle, end_in_vma);
+    }
+    if (start_in_vma != 0) {
+        // Split VMA at the start of the allocated region
+        vma_handle = SplitVMA(vma_handle, start_in_vma);
+    }
+
+    return vma_handle;
+}
+
+MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
+    ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+    ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
+
+    const VAddr target_end{target + size};
+    ASSERT(target_end >= target);
+    ASSERT(size > 0);
+
+    VMAIter begin_vma{StripIterConstness(FindVMA(target))};
+    const VMAIter i_end{vma_map.lower_bound(target_end)};
+    if (std::any_of(begin_vma, i_end, [](const auto& entry) {
+            return entry.second.type == VirtualMemoryArea::Type::Unmapped;
+        })) {
+        return {};
+    }
+
+    if (target != begin_vma->second.base) {
+        begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
+    }
+
+    VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
+    if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
+        end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
+    }
+
+    return begin_vma;
+}
+
+MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
+    VirtualMemoryArea& old_vma{vma_handle->second};
+    VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
+
+    // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
+    // a bug. This restriction might be removed later.
+    ASSERT(offset_in_vma < old_vma.size);
+    ASSERT(offset_in_vma > 0);
+
+    old_vma.size = offset_in_vma;
+    new_vma.base += offset_in_vma;
+    new_vma.size -= offset_in_vma;
+
+    switch (new_vma.type) {
+    case VirtualMemoryArea::Type::Unmapped:
+        break;
+    case VirtualMemoryArea::Type::Allocated:
+        new_vma.offset += offset_in_vma;
+        break;
+    case VirtualMemoryArea::Type::Mapped:
+        new_vma.backing_memory += offset_in_vma;
+        break;
+    }
+
+    ASSERT(old_vma.CanBeMergedWith(new_vma));
+
+    return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
+}
+
+MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
+    const VMAIter next_vma{std::next(iter)};
+    if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
+        iter->second.size += next_vma->second.size;
+        vma_map.erase(next_vma);
+    }
+
+    if (iter != vma_map.begin()) {
+        VMAIter prev_vma{std::prev(iter)};
+        if (prev_vma->second.CanBeMergedWith(iter->second)) {
+            prev_vma->second.size += iter->second.size;
+            vma_map.erase(iter);
+            iter = prev_vma;
         }
     }
-    return results;
+
+    return iter;
 }
 
-VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
-    auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
-    if (!block) {
-        block = std::make_unique<PageBlock>();
-        block->fill(static_cast<VAddr>(PageStatus::Unmapped));
+void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
+    switch (vma.type) {
+    case VirtualMemoryArea::Type::Unmapped:
+        UnmapRegion(vma.base, vma.size);
+        break;
+    case VirtualMemoryArea::Type::Allocated:
+        MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
+        break;
+    case VirtualMemoryArea::Type::Mapped:
+        MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
+        break;
     }
-    return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK];
 }
 
 } // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index fb03497ca..647cbf93a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -1,67 +1,154 @@
-// Copyright 2018 yuzu emulator team
+// Copyright 2018 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #pragma once
 
-#include <array>
-#include <memory>
+#include <map>
 #include <optional>
-#include <vector>
 
 #include "common/common_types.h"
+#include "common/page_table.h"
+
+namespace VideoCore {
+class RasterizerInterface;
+}
 
 namespace Tegra {
 
-/// Virtual addresses in the GPU's memory map are 64 bit.
-using GPUVAddr = u64;
+/**
+ * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
+ * with homogeneous attributes across its extents. In this particular implementation each VMA is
+ * also backed by a single host memory allocation.
+ */
+struct VirtualMemoryArea {
+    enum class Type : u8 {
+        Unmapped,
+        Allocated,
+        Mapped,
+    };
+
+    /// Virtual base address of the region.
+    GPUVAddr base{};
+    /// Size of the region.
+    u64 size{};
+    /// Memory area mapping type.
+    Type type{Type::Unmapped};
+    /// CPU memory mapped address corresponding to this memory area.
+    VAddr backing_addr{};
+    /// Offset into the backing_memory the mapping starts from.
+    std::size_t offset{};
+    /// Pointer backing this VMA.
+    u8* backing_memory{};
+
+    /// Tests if this area can be merged to the right with `next`.
+    bool CanBeMergedWith(const VirtualMemoryArea& next) const;
+};
 
 class MemoryManager final {
 public:
-    MemoryManager();
+    MemoryManager(VideoCore::RasterizerInterface& rasterizer);
 
     GPUVAddr AllocateSpace(u64 size, u64 align);
-    GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
+    GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
     GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
-    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
-    GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
-    GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
-    std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
-    std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;
+    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
+    GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
+    std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
+
+    template <typename T>
+    T Read(GPUVAddr addr) const;
+
+    template <typename T>
+    void Write(GPUVAddr addr, T data);
 
-    static constexpr u64 PAGE_BITS = 16;
-    static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
-    static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
+    u8* GetPointer(GPUVAddr addr);
+    const u8* GetPointer(GPUVAddr addr) const;
+
+    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
 
 private:
-    enum class PageStatus : u64 {
-        Unmapped = 0xFFFFFFFFFFFFFFFFULL,
-        Allocated = 0xFFFFFFFFFFFFFFFEULL,
-        Reserved = 0xFFFFFFFFFFFFFFFDULL,
-    };
+    using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
+    using VMAHandle = VMAMap::const_iterator;
+    using VMAIter = VMAMap::iterator;
 
-    std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
-                                          PageStatus status);
-    VAddr& PageSlot(GPUVAddr gpu_addr);
-
-    static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
-    static constexpr u64 PAGE_TABLE_BITS{10};
-    static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
-    static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1};
-    static constexpr u64 PAGE_BLOCK_BITS{14};
-    static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS};
-    static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1};
-
-    using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>;
-    std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
-
-    struct MappedRegion {
-        VAddr cpu_addr;
-        GPUVAddr gpu_addr;
-        u64 size;
-    };
+    bool IsAddressValid(GPUVAddr addr) const;
+    void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+                  VAddr backing_addr = 0);
+    void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
+    void UnmapRegion(GPUVAddr base, u64 size);
+
+    /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
+    VMAHandle FindVMA(GPUVAddr target) const;
+
+    VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
+
+    /**
+     * Maps an unmanaged host memory pointer at a given address.
+     *
+     * @param target The guest address to start the mapping at.
+     * @param memory The memory to be mapped.
+     * @param size Size of the mapping.
+     * @param state MemoryState tag to attach to the VMA.
+     */
+    VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
+
+    /// Unmaps a range of addresses, splitting VMAs as necessary.
+    void UnmapRange(GPUVAddr target, u64 size);
+
+    /// Converts a VMAHandle to a mutable VMAIter.
+    VMAIter StripIterConstness(const VMAHandle& iter);
+
+    /// Marks as the specfied VMA as allocated.
+    VMAIter Allocate(VMAIter vma);
+
+    /**
+     * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
+     * the appropriate error checking.
+     */
+    VMAIter CarveVMA(GPUVAddr base, u64 size);
+
+    /**
+     * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
+     * end of the range.
+     */
+    VMAIter CarveVMARange(GPUVAddr base, u64 size);
+
+    /**
+     * Splits a VMA in two, at the specified offset.
+     * @returns the right side of the split, with the original iterator becoming the left side.
+     */
+    VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
+
+    /**
+     * Checks for and merges the specified VMA with adjacent ones if possible.
+     * @returns the merged VMA or the original if no merging was possible.
+     */
+    VMAIter MergeAdjacent(VMAIter vma);
+
+    /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
+    void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
+
+    /// Finds a free (unmapped region) of the specified size starting at the specified address.
+    GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const;
+
+private:
+    static constexpr u64 page_bits{16};
+    static constexpr u64 page_size{1 << page_bits};
+    static constexpr u64 page_mask{page_size - 1};
+
+    /// Address space in bits, this is fairly arbitrary but sufficiently large.
+    static constexpr u32 address_space_width{39};
+    /// Start address for mapping, this is fairly arbitrary but must be non-zero.
+    static constexpr GPUVAddr address_space_base{0x100000};
+    /// End of address space, based on address space in bits.
+    static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
 
-    std::vector<MappedRegion> mapped_regions;
+    Common::PageTable page_table{page_bits};
+    VMAMap vma_map;
+    VideoCore::RasterizerInterface& rasterizer;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 9692ce143..3e91cbc83 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -6,7 +6,6 @@
 #include <cstring>
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "core/memory.h"
 #include "video_core/morton.h"
 #include "video_core/surface.h"
 #include "video_core/textures/decoders.h"
@@ -16,12 +15,12 @@ namespace VideoCore {
 using Surface::GetBytesPerPixel;
 using Surface::PixelFormat;
 
-using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, VAddr);
+using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
 using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
 
 template <bool morton_to_linear, PixelFormat format>
 static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
-                       u32 tile_width_spacing, u8* buffer, VAddr addr) {
+                       u32 tile_width_spacing, u8* buffer, u8* addr) {
     constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
 
     // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -34,10 +33,10 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
                                          stride, height, depth, block_height, block_depth,
                                          tile_width_spacing);
     } else {
-        Tegra::Texture::CopySwizzledData(
-            (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y,
-            depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false,
-            block_height, block_depth, tile_width_spacing);
+        Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
+                                         (height + tile_size_y - 1) / tile_size_y, depth,
+                                         bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
+                                         block_height, block_depth, tile_width_spacing);
     }
 }
 
@@ -282,7 +281,7 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
 
 void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
                    u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
-                   u8* buffer, VAddr addr) {
+                   u8* buffer, u8* addr) {
     GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
                                      tile_width_spacing, buffer, addr);
 }
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index b565204b5..ee5b45555 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -13,7 +13,7 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
 
 void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
                    u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
-                   u8* buffer, VAddr addr);
+                   u8* buffer, u8* addr);
 
 void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
                          u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index a7bcf26fb..291772186 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <mutex>
 #include <set>
 #include <unordered_map>
 
@@ -12,14 +13,26 @@
 
 #include "common/common_types.h"
 #include "core/settings.h"
+#include "video_core/gpu.h"
 #include "video_core/rasterizer_interface.h"
 
 class RasterizerCacheObject {
 public:
+    explicit RasterizerCacheObject(const u8* host_ptr)
+        : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
+
     virtual ~RasterizerCacheObject();
 
+    CacheAddr GetCacheAddr() const {
+        return cache_addr;
+    }
+
+    const u8* GetHostPtr() const {
+        return host_ptr;
+    }
+
     /// Gets the address of the shader in guest memory, required for cache management
-    virtual VAddr GetAddr() const = 0;
+    virtual VAddr GetCpuAddr() const = 0;
 
     /// Gets the size of the shader in guest memory, required for cache management
     virtual std::size_t GetSizeInBytes() const = 0;
@@ -58,6 +71,8 @@ private:
     bool is_registered{};      ///< Whether the object is currently registered with the cache
     bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
     u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
+    const u8* host_ptr{};      ///< Pointer to the memory backing this cached region
+    CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space
 };
 
 template <class T>
@@ -68,7 +83,9 @@ public:
     explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
 
     /// Write any cached resources overlapping the specified region back to memory
-    void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
+    void FlushRegion(CacheAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
         const auto& objects{GetSortedObjectsFromRegion(addr, size)};
         for (auto& object : objects) {
             FlushObject(object);
@@ -76,7 +93,9 @@ public:
     }
 
     /// Mark the specified region as being invalidated
-    void InvalidateRegion(VAddr addr, u64 size) {
+    void InvalidateRegion(CacheAddr addr, u64 size) {
+        std::lock_guard lock{mutex};
+
         const auto& objects{GetSortedObjectsFromRegion(addr, size)};
         for (auto& object : objects) {
             if (!object->IsRegistered()) {
@@ -89,48 +108,60 @@ public:
 
     /// Invalidates everything in the cache
     void InvalidateAll() {
+        std::lock_guard lock{mutex};
+
         while (interval_cache.begin() != interval_cache.end()) {
             Unregister(*interval_cache.begin()->second.begin());
         }
     }
 
 protected:
-    /// Tries to get an object from the cache with the specified address
-    T TryGet(VAddr addr) const {
+    /// Tries to get an object from the cache with the specified cache address
+    T TryGet(CacheAddr addr) const {
         const auto iter = map_cache.find(addr);
         if (iter != map_cache.end())
             return iter->second;
         return nullptr;
     }
 
+    T TryGet(const void* addr) const {
+        const auto iter = map_cache.find(ToCacheAddr(addr));
+        if (iter != map_cache.end())
+            return iter->second;
+        return nullptr;
+    }
+
     /// Register an object into the cache
-    void Register(const T& object) {
+    virtual void Register(const T& object) {
+        std::lock_guard lock{mutex};
+
         object->SetIsRegistered(true);
         interval_cache.add({GetInterval(object), ObjectSet{object}});
-        map_cache.insert({object->GetAddr(), object});
-        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
+        map_cache.insert({object->GetCacheAddr(), object});
+        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
     }
 
     /// Unregisters an object from the cache
-    void Unregister(const T& object) {
-        object->SetIsRegistered(false);
-        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
-        // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
-        if (Settings::values.use_accurate_gpu_emulation) {
-            FlushObject(object);
-        }
+    virtual void Unregister(const T& object) {
+        std::lock_guard lock{mutex};
 
+        object->SetIsRegistered(false);
+        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
         interval_cache.subtract({GetInterval(object), ObjectSet{object}});
-        map_cache.erase(object->GetAddr());
+        map_cache.erase(object->GetCacheAddr());
     }
 
     /// Returns a ticks counter used for tracking when cached objects were last modified
     u64 GetModifiedTicks() {
+        std::lock_guard lock{mutex};
+
         return ++modified_ticks;
     }
 
     /// Flushes the specified object, updating appropriate cache state as needed
     void FlushObject(const T& object) {
+        std::lock_guard lock{mutex};
+
         if (!object->IsDirty()) {
             return;
         }
@@ -140,7 +171,7 @@ protected:
 
 private:
     /// Returns a list of cached objects from the specified memory region, ordered by access time
-    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
+    std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
         if (size == 0) {
             return {};
         }
@@ -164,17 +195,18 @@ private:
     }
 
     using ObjectSet = std::set<T>;
-    using ObjectCache = std::unordered_map<VAddr, T>;
-    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
+    using ObjectCache = std::unordered_map<CacheAddr, T>;
+    using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
     using ObjectInterval = typename IntervalCache::interval_type;
 
     static auto GetInterval(const T& object) {
-        return ObjectInterval::right_open(object->GetAddr(),
-                                          object->GetAddr() + object->GetSizeInBytes());
+        return ObjectInterval::right_open(object->GetCacheAddr(),
+                                          object->GetCacheAddr() + object->GetSizeInBytes());
     }
 
     ObjectCache map_cache;
     IntervalCache interval_cache; ///< Cache of objects
     u64 modified_ticks{};         ///< Counter of cache state ticks, used for in-order flushing
     VideoCore::RasterizerInterface& rasterizer;
+    std::recursive_mutex mutex;
 };
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 6a1dc9cf6..d7b86df38 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,7 +9,6 @@
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
 
 namespace VideoCore {
 
@@ -35,14 +34,14 @@ public:
     virtual void FlushAll() = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(VAddr addr, u64 size) = 0;
+    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     /// and invalidated
-    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
 
     /// Attempt to use a faster method to perform a surface copy
     virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
@@ -63,7 +62,7 @@ public:
     }
 
     /// Increase/decrease the number of object in pages touching the specified region
-    virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
+    virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
 
     /// Initialize disk cached resources for the game being emulated
     virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index b3062e5ba..7989ec11b 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,30 +7,33 @@
 
 #include "common/alignment.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 
 namespace OpenGL {
 
+CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
+                                     std::size_t alignment, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
+      alignment{alignment} {}
+
 OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
     : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
 
-GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
-                                      std::size_t alignment, bool cache) {
+GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
+                                      bool cache) {
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");
 
     // Cache management is a big overhead, so only cache entries with a given size.
     // TODO: Figure out which size is the best for given games.
     cache &= size >= 2048;
 
+    const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
     if (cache) {
-        auto entry = TryGet(*cpu_addr);
+        auto entry = TryGet(host_ptr);
         if (entry) {
-            if (entry->size >= size && entry->alignment == alignment) {
-                return entry->offset;
+            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
+                return entry->GetOffset();
             }
             Unregister(entry);
         }
@@ -39,17 +42,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
     AlignBuffer(alignment);
     const GLintptr uploaded_offset = buffer_offset;
 
-    Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+    if (!host_ptr) {
+        return uploaded_offset;
+    }
 
+    std::memcpy(buffer_ptr, host_ptr, size);
     buffer_ptr += size;
     buffer_offset += size;
 
     if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>();
-        entry->offset = uploaded_offset;
-        entry->size = size;
-        entry->alignment = alignment;
-        entry->addr = *cpu_addr;
+        auto entry = std::make_shared<CachedBufferEntry>(
+            *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
         Register(entry);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index c11acfb79..fc33aa433 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -17,22 +17,39 @@ namespace OpenGL {
 
 class RasterizerOpenGL;
 
-struct CachedBufferEntry final : public RasterizerCacheObject {
-    VAddr GetAddr() const override {
-        return addr;
+class CachedBufferEntry final : public RasterizerCacheObject {
+public:
+    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
+                               std::size_t alignment, u8* host_ptr);
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
     }
 
     std::size_t GetSizeInBytes() const override {
         return size;
     }
 
+    std::size_t GetSize() const {
+        return size;
+    }
+
+    GLintptr GetOffset() const {
+        return offset;
+    }
+
+    std::size_t GetAlignment() const {
+        return alignment;
+    }
+
     // We do not have to flush this cache as things in it are never modified by us.
     void Flush() override {}
 
-    VAddr addr;
-    std::size_t size;
-    GLintptr offset;
-    std::size_t alignment;
+private:
+    VAddr cpu_addr{};
+    std::size_t size{};
+    GLintptr offset{};
+    std::size_t alignment{};
 };
 
 class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
@@ -41,7 +58,7 @@ public:
 
     /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
     /// allocated.
-    GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+    GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
                           bool cache = true);
 
     /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 7161d1dea..5842d6213 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,10 +4,8 @@
 
 #include <glad/glad.h>
 
-#include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/renderer_opengl/gl_global_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -15,12 +13,13 @@
 
 namespace OpenGL {
 
-CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
+CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
     buffer.Create();
     // Bind and unbind the buffer so it gets allocated by the driver
     glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
     glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
-    LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
+    LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
 }
 
 void CachedGlobalRegion::Reload(u32 size_) {
@@ -35,10 +34,10 @@ void CachedGlobalRegion::Reload(u32 size_) {
 
     // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
     glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
-    glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
+    glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
 }
 
-GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
+GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
     const auto search{reserve.find(addr)};
     if (search == reserve.end()) {
         return {};
@@ -46,11 +45,14 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
     return search->second;
 }
 
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
-    GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
+                                                              u8* host_ptr) {
+    GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
     if (!region) {
         // No reserved surface available, create a new one and reserve it
-        region = std::make_shared<CachedGlobalRegion>(addr, size);
+        auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+        const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
+        region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
         ReserveGlobalRegion(region);
     }
     region->Reload(size);
@@ -58,7 +60,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si
 }
 
 void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
-    reserve.insert_or_assign(region->GetAddr(), std::move(region));
+    reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
 }
 
 GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -69,22 +71,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
     Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
 
     auto& gpu{Core::System::GetInstance().GPU()};
-    const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
-    const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
-        cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
-    ASSERT(cbuf_addr);
-
-    const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
-    const auto size = Memory::Read32(*cbuf_addr + 8);
-    const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
-    ASSERT(actual_addr);
+    auto& memory_manager{gpu.MemoryManager()};
+    const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
+    const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
+                    global_region.GetCbufOffset()};
+    const auto actual_addr{memory_manager.Read<u64>(addr)};
+    const auto size{memory_manager.Read<u32>(addr + 8)};
 
     // Look up global region in the cache based on address
-    GlobalRegion region = TryGet(*actual_addr);
+    const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
+    GlobalRegion region{TryGet(host_ptr)};
 
     if (!region) {
         // No global region found - create a new one
-        region = GetUncachedGlobalRegion(*actual_addr, size);
+        region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
         Register(region);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index ba2bdc60c..5a21ab66f 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -27,14 +27,12 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
 
 class CachedGlobalRegion final : public RasterizerCacheObject {
 public:
-    explicit CachedGlobalRegion(VAddr addr, u32 size);
+    explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
 
-    /// Gets the address of the shader in guest memory, required for cache management
-    VAddr GetAddr() const override {
-        return addr;
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
     }
 
-    /// Gets the size of the shader in guest memory, required for cache management
     std::size_t GetSizeInBytes() const override {
         return size;
     }
@@ -53,9 +51,8 @@ public:
     }
 
 private:
-    VAddr addr{};
+    VAddr cpu_addr{};
     u32 size{};
-
     OGLBuffer buffer;
 };
 
@@ -68,11 +65,11 @@ public:
                                  Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
 
 private:
-    GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
-    GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
+    GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
+    GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
     void ReserveGlobalRegion(GlobalRegion region);
 
-    std::unordered_map<VAddr, GlobalRegion> reserve;
+    std::unordered_map<CacheAddr, GlobalRegion> reserve;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index 77d5cedd2..c3e94d917 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -7,7 +7,7 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "core/core.h"
-#include "core/memory.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_primitive_assembler.h"
 
@@ -40,16 +40,12 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
     return index_offset;
 }
 
-GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
-                                             u32 count) {
+GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
     const std::size_t map_size{CalculateQuadSize(count)};
     auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
 
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");
-
-    const u8* source{Memory::GetPointer(*cpu_addr)};
+    const u8* source{memory_manager.GetPointer(gpu_addr)};
 
     for (u32 primitive = 0; primitive < count / 4; ++primitive) {
         for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -64,4 +60,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
     return index_offset;
 }
 
-} // namespace OpenGL
-\ No newline at end of file
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
index a8cb88eb5..4e87ce4d6 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -4,11 +4,9 @@
 
 #pragma once
 
-#include <vector>
 #include <glad/glad.h>
 
 #include "common/common_types.h"
-#include "video_core/memory_manager.h"
 
 namespace OpenGL {
 
@@ -24,7 +22,7 @@ public:
 
     GLintptr MakeQuadArray(u32 first, u32 count);
 
-    GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
+    GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
 
 private:
     OGLBufferCache& buffer_cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 976f64c24..7ff1e6737 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -17,7 +17,6 @@
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
 #include "core/core.h"
-#include "core/frontend/emu_window.h"
 #include "core/hle/kernel/process.h"
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -26,7 +25,6 @@
 #include "video_core/renderer_opengl/gl_shader_gen.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
-#include "video_core/video_core.h"
 
 namespace OpenGL {
 
@@ -100,11 +98,9 @@ struct FramebufferCacheKey {
     }
 };
 
-RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
-                                   ScreenInfo& info)
-    : res_cache{*this}, shader_cache{*this, system}, global_cache{*this},
-      emu_window{window}, system{system}, screen_info{info},
-      buffer_cache(*this, STREAM_BUFFER_SIZE) {
+RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
+    : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
+      screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
     // Create sampler objects
     for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
         texture_samplers[i].Create();
@@ -225,8 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
         if (!vertex_array.IsEnabled())
             continue;
 
-        const Tegra::GPUVAddr start = vertex_array.StartAddress();
-        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+        const GPUVAddr start = vertex_array.StartAddress();
+        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
 
         ASSERT(end > start);
         const u64 size = end - start + 1;
@@ -320,7 +316,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
         const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
 
         GLShader::MaxwellUniformData ubo{};
-        ubo.SetFromRegs(gpu.state.shader_stages[stage]);
+        ubo.SetFromRegs(gpu, stage);
         const GLintptr offset = buffer_cache.UploadHostMemory(
             &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
 
@@ -421,8 +417,8 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
         if (!regs.vertex_array[index].IsEnabled())
             continue;
 
-        const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
-        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+        const GPUVAddr start = regs.vertex_array[index].StartAddress();
+        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
 
         ASSERT(end > start);
         size += end - start + 1;
@@ -449,7 +445,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
     return boost::make_iterator_range(map.equal_range(interval));
 }
 
-void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
+void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
     const u64 page_start{addr >> Memory::PAGE_BITS};
     const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
 
@@ -747,20 +743,26 @@ void RasterizerOpenGL::DrawArrays() {
 
 void RasterizerOpenGL::FlushAll() {}
 
-void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    if (!addr || !size) {
+        return;
+    }
     res_cache.FlushRegion(addr, size);
 }
 
-void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    if (!addr || !size) {
+        return;
+    }
     res_cache.InvalidateRegion(addr, size);
     shader_cache.InvalidateRegion(addr, size);
     global_cache.InvalidateRegion(addr, size);
     buffer_cache.InvalidateRegion(addr, size);
 }
 
-void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
     FlushRegion(addr, size);
     InvalidateRegion(addr, size);
 }
@@ -782,7 +784,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
 
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 
-    const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
+    const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
     if (!surface) {
         return {};
     }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ca3de0592..54fbf48aa 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -12,15 +12,12 @@
 #include <optional>
 #include <tuple>
 #include <utility>
-#include <vector>
 
 #include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range.hpp>
 #include <glad/glad.h>
 
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -29,10 +26,8 @@
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
 
 namespace Core {
 class System;
@@ -50,16 +45,15 @@ struct FramebufferCacheKey;
 
 class RasterizerOpenGL : public VideoCore::RasterizerInterface {
 public:
-    explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
-                              ScreenInfo& info);
+    explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info);
     ~RasterizerOpenGL() override;
 
     void DrawArrays() override;
     void Clear() override;
     void FlushAll() override;
-    void FlushRegion(VAddr addr, u64 size) override;
-    void InvalidateRegion(VAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                const Tegra::Engines::Fermi2D::Regs::Surface& dst,
                                const Common::Rectangle<u32>& src_rect,
@@ -67,7 +61,7 @@ public:
     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                            u32 pixel_stride) override;
     bool AccelerateDrawBatch(bool is_indexed) override;
-    void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
+    void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
     void LoadDiskResources(const std::atomic_bool& stop_loading,
                            const VideoCore::DiskResourceLoadCallback& callback) override;
 
@@ -214,7 +208,6 @@ private:
     ShaderCacheOpenGL shader_cache;
     GlobalRegionCacheOpenGL global_cache;
 
-    Core::Frontend::EmuWindow& emu_window;
     Core::System& system;
 
     ScreenInfo& screen_info;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index bd1409660..5876145ef 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -13,7 +13,6 @@
 #include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
-#include "core/memory.h"
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/morton.h"
@@ -55,12 +54,11 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
     }
 }
 
-void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
+void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
 
-    addr = cpu_addr ? *cpu_addr : 0;
     gpu_addr = gpu_addr_;
+    host_ptr = memory_manager.GetPointer(gpu_addr_);
     size_in_bytes = SizeInBytesRaw();
 
     if (IsPixelFormatASTC(pixel_format)) {
@@ -223,7 +221,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
 }
 
 /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
-    u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+    u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
     u32 block_width, u32 block_height, u32 block_depth,
     Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
     SurfaceParams params{};
@@ -446,7 +444,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
             MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                           params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
                           params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
-                          gl_buffer.data() + offset_gl, params.addr + offset);
+                          gl_buffer.data() + offset_gl, params.host_ptr + offset);
             offset += layer_size;
             offset_gl += gl_size;
         }
@@ -455,7 +453,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
         MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                       params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
                       params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
-                      gl_buffer.data(), params.addr + offset);
+                      gl_buffer.data(), params.host_ptr + offset);
     }
 }
 
@@ -513,9 +511,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
                               "reinterpretation but the texture is tiled.");
         }
         const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
-
+        auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
         glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
-                        Memory::GetPointer(dst_params.addr + src_params.size_in_bytes));
+                        memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
     }
 
     glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -563,8 +561,14 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
 }
 
 CachedSurface::CachedSurface(const SurfaceParams& params)
-    : params(params), gl_target(SurfaceTargetToGL(params.target)),
-      cached_size_in_bytes(params.size_in_bytes) {
+    : RasterizerCacheObject{params.host_ptr}, params{params},
+      gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} {
+
+    const auto optional_cpu_addr{
+        Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
+    ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
+    cpu_addr = *optional_cpu_addr;
+
     texture.Create(gl_target);
 
     // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
@@ -603,19 +607,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
 
     ApplyTextureDefaults(texture.handle, params.max_mip_level);
 
-    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
-
-    // Clamp size to mapped GPU memory region
-    // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
-    // R32F render buffer. We do not yet know if this is a game bug or something else, but this
-    // check is necessary to prevent flushing from overwriting unmapped memory.
-
-    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
-    if (cached_size_in_bytes > max_size) {
-        LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
-        cached_size_in_bytes = max_size;
-    }
+    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
 }
 
 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -633,10 +625,9 @@ void CachedSurface::LoadGLBuffer() {
         const u32 bpp = params.GetFormatBpp() / 8;
         const u32 copy_size = params.width * bpp;
         if (params.pitch == copy_size) {
-            std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr),
-                        params.size_in_bytes_gl);
+            std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
         } else {
-            const u8* start = Memory::GetPointer(params.addr);
+            const u8* start{params.host_ptr};
             u8* write_to = gl_buffer[0].data();
             for (u32 h = params.height; h > 0; h--) {
                 std::memcpy(write_to, start, copy_size);
@@ -670,8 +661,8 @@ void CachedSurface::FlushGLBuffer() {
     gl_buffer[0].resize(GetSizeInBytes());
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
+    glPixelStorei(GL_PACK_ALIGNMENT, align);
     glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
     ASSERT(!tuple.compressed);
     glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -680,8 +671,6 @@ void CachedSurface::FlushGLBuffer() {
     glPixelStorei(GL_PACK_ROW_LENGTH, 0);
     Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
                                            params.height, params.depth, true, true);
-    const u8* const texture_src_data = Memory::GetPointer(params.addr);
-    ASSERT(texture_src_data);
     if (params.is_tiled) {
         ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
                    params.block_width, static_cast<u32>(params.target));
@@ -691,9 +680,9 @@ void CachedSurface::FlushGLBuffer() {
         const u32 bpp = params.GetFormatBpp() / 8;
         const u32 copy_size = params.width * bpp;
         if (params.pitch == copy_size) {
-            std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes());
+            std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
         } else {
-            u8* start = Memory::GetPointer(params.addr);
+            u8* start{params.host_ptr};
             const u8* read_to = gl_buffer[0].data();
             for (u32 h = params.height; h > 0; h--) {
                 std::memcpy(start, read_to, copy_size);
@@ -718,8 +707,8 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
 
-    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
+    glPixelStorei(GL_UNPACK_ALIGNMENT, align);
     glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
 
     const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
@@ -927,12 +916,12 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
 }
 
 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
-    if (params.addr == 0 || params.height * params.width == 0) {
+    if (!params.IsValid()) {
         return {};
     }
 
     // Look up surface in the cache based on address
-    Surface surface{TryGet(params.addr)};
+    Surface surface{TryGet(params.host_ptr)};
     if (surface) {
         if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
             // Use the cached surface as-is unless it's not synced with memory
@@ -943,7 +932,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
             // If surface parameters changed and we care about keeping the previous data, recreate
             // the surface from the old one
             Surface new_surface{RecreateSurface(surface, params)};
-            UnregisterSurface(surface);
+            Unregister(surface);
             Register(new_surface);
             if (new_surface->IsUploaded()) {
                 RegisterReinterpretSurface(new_surface);
@@ -951,7 +940,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
             return new_surface;
         } else {
             // Delete the old surface before creating a new one to prevent collisions.
-            UnregisterSurface(surface);
+            Unregister(surface);
         }
     }
 
@@ -981,14 +970,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
                                                    const Surface& dst_surface) {
     const auto& init_params{src_surface->GetSurfaceParams()};
     const auto& dst_params{dst_surface->GetSurfaceParams()};
-    VAddr address = init_params.addr;
-    const std::size_t layer_size = dst_params.LayerMemorySize();
+    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+    GPUVAddr address{init_params.gpu_addr};
+    const std::size_t layer_size{dst_params.LayerMemorySize()};
     for (u32 layer = 0; layer < dst_params.depth; layer++) {
         for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
-            const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
-            const Surface& copy = TryGet(sub_address);
-            if (!copy)
+            const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
+            const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
+            if (!copy) {
                 continue;
+            }
             const auto& src_params{copy->GetSurfaceParams()};
             const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
             const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1163,7 +1154,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
     const auto& dst_params{dst_surface->GetSurfaceParams()};
 
     // Flush enough memory for both the source and destination surface
-    FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize()));
+    FlushRegion(ToCacheAddr(src_params.host_ptr),
+                std::max(src_params.MemorySize(), dst_params.MemorySize()));
 
     LoadSurface(dst_surface);
 }
@@ -1215,8 +1207,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
     return new_surface;
 }
 
-Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
-    return TryGet(addr);
+Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
+    return TryGet(host_ptr);
 }
 
 void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1243,9 +1235,9 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
     return {};
 }
 
-static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
-    const std::size_t size = params.LayerMemorySize();
-    VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
+static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
+    const std::size_t size{params.LayerMemorySize()};
+    GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
     for (u32 i = 0; i < params.depth; i++) {
         if (start == addr) {
             return {i};
@@ -1267,7 +1259,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
             src_params.height == dst_params.MipHeight(*level) &&
             src_params.block_height >= dst_params.MipBlockHeight(*level)) {
             const std::optional<u32> slot =
-                TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
+                TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
             if (slot.has_value()) {
                 glCopyImageSubData(render_surface->Texture().handle,
                                    SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
@@ -1283,8 +1275,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
 }
 
 static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
-    const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
-    const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
+    const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
+    const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
     if (bound2 > bound1)
         return true;
     const auto& dst_params = blitted_surface->GetSurfaceParams();
@@ -1302,12 +1294,12 @@ static bool IsReinterpretInvalidSecond(const Surface render_surface,
 bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
                                                       Surface intersect) {
     if (IsReinterpretInvalid(triggering_surface, intersect)) {
-        UnregisterSurface(intersect);
+        Unregister(intersect);
         return false;
     }
     if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
         if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
-            UnregisterSurface(intersect);
+            Unregister(intersect);
             return false;
         }
         FlushObject(intersect);
@@ -1327,7 +1319,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() {
 void RasterizerCacheOpenGL::SignalPostDrawCall() {
     for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
         if (current_color_buffers[i] != nullptr) {
-            Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
+            Surface intersect =
+                CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
             if (intersect != nullptr) {
                 PartialReinterpretSurface(current_color_buffers[i], intersect);
                 texception = true;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 9cf6f50be..db280dbb3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -5,13 +5,13 @@
 #pragma once
 
 #include <array>
-#include <map>
 #include <memory>
 #include <string>
-#include <unordered_set>
+#include <tuple>
 #include <vector>
 
 #include "common/alignment.h"
+#include "common/bit_util.h"
 #include "common/common_types.h"
 #include "common/hash.h"
 #include "common/math_util.h"
@@ -109,6 +109,11 @@ struct SurfaceParams {
         return size;
     }
 
+    /// Returns true if the parameters constitute a valid rasterizer surface.
+    bool IsValid() const {
+        return gpu_addr && host_ptr && height && width;
+    }
+
     /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
     /// mipmaps.
     std::size_t LayerMemorySize() const {
@@ -201,6 +206,13 @@ struct SurfaceParams {
         return bd;
     }
 
+    u32 RowAlign(u32 mip_level) const {
+        const u32 m_width = MipWidth(mip_level);
+        const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
+        const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
+        return (1U << l2);
+    }
+
     /// Creates SurfaceParams from a texture configuration
     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
                                           const GLShader::SamplerEntry& entry);
@@ -210,7 +222,7 @@ struct SurfaceParams {
 
     /// Creates SurfaceParams for a depth buffer configuration
     static SurfaceParams CreateForDepthBuffer(
-        u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+        u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
         u32 block_width, u32 block_height, u32 block_depth,
         Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
 
@@ -232,7 +244,7 @@ struct SurfaceParams {
     }
 
     /// Initializes parameters for caching, should be called after everything has been initialized
-    void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
+    void InitCacheParameters(GPUVAddr gpu_addr);
 
     std::string TargetName() const {
         switch (target) {
@@ -296,8 +308,8 @@ struct SurfaceParams {
     bool is_array;
     bool srgb_conversion;
     // Parameters used for caching
-    VAddr addr;
-    Tegra::GPUVAddr gpu_addr;
+    u8* host_ptr;
+    GPUVAddr gpu_addr;
     std::size_t size_in_bytes;
     std::size_t size_in_bytes_gl;
 
@@ -345,10 +357,10 @@ class RasterizerOpenGL;
 
 class CachedSurface final : public RasterizerCacheObject {
 public:
-    CachedSurface(const SurfaceParams& params);
+    explicit CachedSurface(const SurfaceParams& params);
 
-    VAddr GetAddr() const override {
-        return params.addr;
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
     }
 
     std::size_t GetSizeInBytes() const override {
@@ -432,6 +444,7 @@ private:
     std::size_t memory_size;
     bool reinterpreted = false;
     bool must_reload = false;
+    VAddr cpu_addr{};
 };
 
 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -449,7 +462,7 @@ public:
     Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
 
     /// Tries to find a framebuffer using on the provided CPU address
-    Surface TryFindFramebufferSurface(VAddr addr) const;
+    Surface TryFindFramebufferSurface(const u8* host_ptr) const;
 
     /// Copies the contents of one surface to another
     void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
@@ -506,12 +519,12 @@ private:
     std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
     Surface last_depth_buffer;
 
-    using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
+    using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
     using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
 
     static auto GetReinterpretInterval(const Surface& object) {
-        return SurfaceInterval::right_open(object->GetAddr() + 1,
-                                           object->GetAddr() + object->GetMemorySize() - 1);
+        return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
+                                           object->GetCacheAddr() + object->GetMemorySize() - 1);
     }
 
     // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
@@ -523,7 +536,7 @@ private:
         reinterpret_surface->MarkReinterpreted();
     }
 
-    Surface CollideOnReinterpretedSurface(VAddr addr) const {
+    Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
         const SurfaceInterval interval{addr};
         for (auto& pair :
              boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
@@ -532,13 +545,17 @@ private:
         return nullptr;
     }
 
+    void Register(const Surface& object) override {
+        RasterizerCache<Surface>::Register(object);
+    }
+
     /// Unregisters an object from the cache
-    void UnregisterSurface(const Surface& object) {
+    void Unregister(const Surface& object) override {
         if (object->IsReinterpreted()) {
             auto interval = GetReinterpretInterval(object);
             reinterpreted_surfaces.erase(interval);
         }
-        Unregister(object);
+        RasterizerCache<Surface>::Unregister(object);
     }
 };
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4883e4f62..ab381932c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -6,13 +6,11 @@
 #include "common/assert.h"
 #include "common/hash.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/utils.h"
 #include "video_core/shader/shader_ir.h"
 
@@ -32,19 +30,20 @@ struct UnspecializedShader {
 namespace {
 
 /// Gets the address for the specified shader stage program
-VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
-    const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
-                                                            shader_config.offset);
-    ASSERT_MSG(address, "Invalid GPU address");
-    return *address;
+GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+    const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
 }
 
 /// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(VAddr addr) {
+ProgramCode GetShaderCode(const u8* host_ptr) {
     ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
-    Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+        std::fill(program_code.begin(), program_code.end(), 0);
+        return program_code;
+    });
+    std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
     return program_code;
 }
 
@@ -214,12 +213,13 @@ std::set<GLenum> GetSupportedFormats() {
 
 } // namespace
 
-CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
-                           ShaderDiskCacheOpenGL& disk_cache,
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                            const PrecompiledPrograms& precompiled_programs,
-                           ProgramCode&& program_code, ProgramCode&& program_code_b)
-    : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
-      disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+                           ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr},
+      unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache},
+      precompiled_programs{precompiled_programs} {
 
     const std::size_t code_size = CalculateProgramSize(program_code);
     const std::size_t code_size_b =
@@ -243,12 +243,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro
     disk_cache.SaveRaw(raw);
 }
 
-CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
-                           ShaderDiskCacheOpenGL& disk_cache,
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                            const PrecompiledPrograms& precompiled_programs,
-                           GLShader::ProgramResult result)
-    : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
-      disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+                           GLShader::ProgramResult result, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
+      program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
+                                                              precompiled_programs} {
 
     code = std::move(result.first);
     entries = result.second;
@@ -271,7 +272,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
                 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
             }
 
-            LabelGLObject(GL_PROGRAM, program->handle, addr);
+            LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
         }
 
         handle = program->handle;
@@ -323,7 +324,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
         disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
     }
 
-    LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name);
+    LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
 
     return target_program->handle;
 };
@@ -486,29 +487,32 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
         return last_shaders[static_cast<u32>(program)];
     }
 
-    const VAddr program_addr{GetShaderAddress(program)};
+    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+    const GPUVAddr program_addr{GetShaderAddress(program)};
 
     // Look up shader in the cache based on address
-    Shader shader{TryGet(program_addr)};
+    const auto& host_ptr{memory_manager.GetPointer(program_addr)};
+    Shader shader{TryGet(host_ptr)};
 
     if (!shader) {
         // No shader found - create a new one
-        ProgramCode program_code = GetShaderCode(program_addr);
+        ProgramCode program_code{GetShaderCode(host_ptr)};
         ProgramCode program_code_b;
         if (program == Maxwell::ShaderProgram::VertexA) {
-            program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB));
+            program_code_b = GetShaderCode(
+                memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
         }
         const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
-
+        const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
         const auto found = precompiled_shaders.find(unique_identifier);
         if (found != precompiled_shaders.end()) {
             shader =
-                std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
-                                               precompiled_programs, found->second);
+                std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
+                                               precompiled_programs, found->second, host_ptr);
         } else {
             shader = std::make_shared<CachedShader>(
-                program_addr, unique_identifier, program, disk_cache, precompiled_programs,
-                std::move(program_code), std::move(program_code_b));
+                cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
+                std::move(program_code), std::move(program_code_b), host_ptr);
         }
         Register(shader);
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 97eed192f..0cf8e0b3d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,21 +5,20 @@
 #pragma once
 
 #include <array>
+#include <atomic>
 #include <memory>
 #include <set>
 #include <tuple>
 #include <unordered_map>
+#include <vector>
 
 #include <glad/glad.h>
 
-#include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/rasterizer_cache.h"
-#include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
 
 namespace Core {
 class System;
@@ -39,18 +38,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
 
 class CachedShader final : public RasterizerCacheObject {
 public:
-    explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
-                          ShaderDiskCacheOpenGL& disk_cache,
+    explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                          Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                           const PrecompiledPrograms& precompiled_programs,
-                          ProgramCode&& program_code, ProgramCode&& program_code_b);
+                          ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
 
-    explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
-                          ShaderDiskCacheOpenGL& disk_cache,
+    explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                          Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                           const PrecompiledPrograms& precompiled_programs,
-                          GLShader::ProgramResult result);
+                          GLShader::ProgramResult result, u8* host_ptr);
 
-    VAddr GetAddr() const override {
-        return addr;
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
     }
 
     std::size_t GetSizeInBytes() const override {
@@ -91,7 +90,8 @@ private:
 
     ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
 
-    VAddr addr{};
+    u8* host_ptr{};
+    VAddr cpu_addr{};
     u64 unique_identifier{};
     Maxwell::ShaderProgram program_type{};
     ShaderDiskCacheOpenGL& disk_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 11d1169f0..3ea08ef7b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -21,6 +21,8 @@
 
 namespace OpenGL::GLShader {
 
+namespace {
+
 using Tegra::Shader::Attribute;
 using Tegra::Shader::AttributeUse;
 using Tegra::Shader::Header;
@@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
 using Operation = const OperationNode&;
 
+enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+
+struct TextureAoffi {};
+using TextureArgument = std::pair<Type, Node>;
+using TextureIR = std::variant<TextureAoffi, TextureArgument>;
+
 enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
 constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
     static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
 constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
     static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
 
-enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
-
 class ShaderWriter {
 public:
     void AddExpression(std::string_view text) {
@@ -69,10 +75,10 @@ public:
         shader_source += '\n';
     }
 
-    std::string GenerateTemporal() {
-        std::string temporal = "tmp";
-        temporal += std::to_string(temporal_index++);
-        return temporal;
+    std::string GenerateTemporary() {
+        std::string temporary = "tmp";
+        temporary += std::to_string(temporary_index++);
+        return temporary;
     }
 
     std::string GetResult() {
@@ -87,11 +93,11 @@ private:
     }
 
     std::string shader_source;
-    u32 temporal_index = 1;
+    u32 temporary_index = 1;
 };
 
 /// Generates code to use for a swizzle operation.
-static std::string GetSwizzle(u32 elem) {
+std::string GetSwizzle(u32 elem) {
     ASSERT(elem <= 3);
     std::string swizzle = ".";
     swizzle += "xyzw"[elem];
@@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) {
 }
 
 /// Translate topology
-static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
+std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
     switch (topology) {
     case Tegra::Shader::OutputTopology::PointList:
         return "points";
@@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
 }
 
 /// Returns true if an object has to be treated as precise
-static bool IsPrecise(Operation operand) {
+bool IsPrecise(Operation operand) {
     const auto& meta = operand.GetMeta();
 
     if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
@@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) {
     return false;
 }
 
-static bool IsPrecise(Node node) {
+bool IsPrecise(Node node) {
     if (const auto operation = std::get_if<OperationNode>(node)) {
         return IsPrecise(*operation);
     }
@@ -426,9 +432,14 @@ private:
     std::string Visit(Node node) {
         if (const auto operation = std::get_if<OperationNode>(node)) {
             const auto operation_index = static_cast<std::size_t>(operation->GetCode());
+            if (operation_index >= operation_decompilers.size()) {
+                UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
+                return {};
+            }
             const auto decompiler = operation_decompilers[operation_index];
             if (decompiler == nullptr) {
-                UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
+                UNREACHABLE_MSG("Undefined operation: {}", operation_index);
+                return {};
             }
             return (this->*decompiler)(*operation);
 
@@ -540,7 +551,7 @@ private:
 
             } else if (std::holds_alternative<OperationNode>(*offset)) {
                 // Indirect access
-                const std::string final_offset = code.GenerateTemporal();
+                const std::string final_offset = code.GenerateTemporary();
                 code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " +
                              std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';');
                 return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
@@ -587,9 +598,9 @@ private:
         // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
         const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
 
-        const std::string temporal = code.GenerateTemporal();
-        code.AddLine(precise + "float " + temporal + " = " + value + ';');
-        return temporal;
+        const std::string temporary = code.GenerateTemporary();
+        code.AddLine(precise + "float " + temporary + " = " + value + ';');
+        return temporary;
     }
 
     std::string VisitOperand(Operation operation, std::size_t operand_index) {
@@ -601,9 +612,9 @@ private:
             return Visit(operand);
         }
 
-        const std::string temporal = code.GenerateTemporal();
-        code.AddLine("float " + temporal + " = " + Visit(operand) + ';');
-        return temporal;
+        const std::string temporary = code.GenerateTemporary();
+        code.AddLine("float " + temporary + " = " + Visit(operand) + ';');
+        return temporary;
     }
 
     std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
@@ -718,8 +729,8 @@ private:
                                                          result_type));
     }
 
-    std::string GenerateTexture(Operation operation, const std::string& func,
-                                const std::vector<std::pair<Type, Node>>& extras) {
+    std::string GenerateTexture(Operation operation, const std::string& function_suffix,
+                                const std::vector<TextureIR>& extras) {
         constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
 
         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -729,11 +740,11 @@ private:
         const bool has_array = meta->sampler.IsArray();
         const bool has_shadow = meta->sampler.IsShadow();
 
-        std::string expr = func;
-        expr += '(';
-        expr += GetSampler(meta->sampler);
-        expr += ", ";
-
+        std::string expr = "texture" + function_suffix;
+        if (!meta->aoffi.empty()) {
+            expr += "Offset";
+        }
+        expr += '(' + GetSampler(meta->sampler) + ", ";
         expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
         expr += '(';
         for (std::size_t i = 0; i < count; ++i) {
@@ -751,36 +762,74 @@ private:
         }
         expr += ')';
 
-        for (const auto& extra_pair : extras) {
-            const auto [type, operand] = extra_pair;
-            if (operand == nullptr) {
-                continue;
+        for (const auto& variant : extras) {
+            if (const auto argument = std::get_if<TextureArgument>(&variant)) {
+                expr += GenerateTextureArgument(*argument);
+            } else if (std::get_if<TextureAoffi>(&variant)) {
+                expr += GenerateTextureAoffi(meta->aoffi);
+            } else {
+                UNREACHABLE();
             }
-            expr += ", ";
+        }
 
-            switch (type) {
-            case Type::Int:
-                if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
-                    // Inline the string as an immediate integer in GLSL (some extra arguments are
-                    // required to be constant)
-                    expr += std::to_string(static_cast<s32>(immediate->GetValue()));
-                } else {
-                    expr += "ftoi(" + Visit(operand) + ')';
-                }
-                break;
-            case Type::Float:
-                expr += Visit(operand);
-                break;
-            default: {
-                const auto type_int = static_cast<u32>(type);
-                UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
-                expr += '0';
-                break;
+        return expr + ')';
+    }
+
+    std::string GenerateTextureArgument(TextureArgument argument) {
+        const auto [type, operand] = argument;
+        if (operand == nullptr) {
+            return {};
+        }
+
+        std::string expr = ", ";
+        switch (type) {
+        case Type::Int:
+            if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+                // Inline the string as an immediate integer in GLSL (some extra arguments are
+                // required to be constant)
+                expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+            } else {
+                expr += "ftoi(" + Visit(operand) + ')';
             }
+            break;
+        case Type::Float:
+            expr += Visit(operand);
+            break;
+        default: {
+            const auto type_int = static_cast<u32>(type);
+            UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
+            expr += '0';
+            break;
+        }
+        }
+        return expr;
+    }
+
+    std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
+        if (aoffi.empty()) {
+            return {};
+        }
+        constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
+        std::string expr = ", ";
+        expr += coord_constructors.at(aoffi.size() - 1);
+        expr += '(';
+
+        for (std::size_t index = 0; index < aoffi.size(); ++index) {
+            const auto operand{aoffi.at(index)};
+            if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+                // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+                // to be constant by the standard).
+                expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+            } else {
+                expr += "ftoi(" + Visit(operand) + ')';
+            }
+            if (index + 1 < aoffi.size()) {
+                expr += ", ";
             }
         }
+        expr += ')';
 
-        return expr + ')';
+        return expr;
     }
 
     std::string Assign(Operation operation) {
@@ -1159,7 +1208,8 @@ private:
         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
         ASSERT(meta);
 
-        std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
+        std::string expr = GenerateTexture(
+            operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
         if (meta->sampler.IsShadow()) {
             expr = "vec4(" + expr + ')';
         }
@@ -1170,7 +1220,8 @@ private:
         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
         ASSERT(meta);
 
-        std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
+        std::string expr = GenerateTexture(
+            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
         if (meta->sampler.IsShadow()) {
             expr = "vec4(" + expr + ')';
         }
@@ -1182,7 +1233,8 @@ private:
         ASSERT(meta);
 
         const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
-        return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
+        return GenerateTexture(operation, "Gather",
+                               {TextureArgument{type, meta->component}, TextureAoffi{}}) +
                GetSwizzle(meta->element);
     }
 
@@ -1196,11 +1248,12 @@ private:
         switch (meta->element) {
         case 0:
         case 1:
-            return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element);
+            return "itof(int(textureSize(" + sampler + ", " + lod + ')' +
+                   GetSwizzle(meta->element) + "))";
         case 2:
             return "0";
         case 3:
-            return "textureQueryLevels(" + sampler + ')';
+            return "itof(textureQueryLevels(" + sampler + "))";
         }
         UNREACHABLE();
         return "0";
@@ -1211,8 +1264,8 @@ private:
         ASSERT(meta);
 
         if (meta->element < 2) {
-            return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
-                   " * vec2(256))" + GetSwizzle(meta->element) + "))";
+            return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
+                   GetSwizzle(meta->element) + "))";
         }
         return "0";
     }
@@ -1565,6 +1618,8 @@ private:
     ShaderWriter code;
 };
 
+} // Anonymous namespace
+
 std::string GetCommonDeclarations() {
     const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
     const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 72aca4938..4e04ab2f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,7 +5,6 @@
 #pragma once
 
 #include <array>
-#include <set>
 #include <string>
 #include <utility>
 #include <vector>
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 82fc4d44b..8a43eb157 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -4,7 +4,6 @@
 
 #include <cstring>
 #include <fmt/format.h>
-#include <lz4.h>
 
 #include "common/assert.h"
 #include "common/common_paths.h"
@@ -12,6 +11,7 @@
 #include "common/file_util.h"
 #include "common/logging/log.h"
 #include "common/scm_rev.h"
+#include "common/zstd_compression.h"
 
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
@@ -49,39 +49,6 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
     return hash;
 }
 
-template <typename T>
-std::vector<u8> CompressData(const T* source, std::size_t source_size) {
-    if (source_size > LZ4_MAX_INPUT_SIZE) {
-        // Source size exceeds LZ4 maximum input size
-        return {};
-    }
-    const auto source_size_int = static_cast<int>(source_size);
-    const int max_compressed_size = LZ4_compressBound(source_size_int);
-    std::vector<u8> compressed(max_compressed_size);
-    const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
-                                                     reinterpret_cast<char*>(compressed.data()),
-                                                     source_size_int, max_compressed_size);
-    if (compressed_size <= 0) {
-        // Compression failed
-        return {};
-    }
-    compressed.resize(compressed_size);
-    return compressed;
-}
-
-std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) {
-    std::vector<u8> uncompressed(uncompressed_size);
-    const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
-                                               reinterpret_cast<char*>(uncompressed.data()),
-                                               static_cast<int>(compressed.size()),
-                                               static_cast<int>(uncompressed.size()));
-    if (static_cast<int>(uncompressed_size) != size_check) {
-        // Decompression failed
-        return {};
-    }
-    return uncompressed;
-}
-
 } // namespace
 
 ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
@@ -292,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
                 return {};
             }
 
-            dump.binary = DecompressData(compressed_binary, binary_length);
+            dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
             if (dump.binary.empty()) {
                 return {};
             }
@@ -321,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
         return {};
     }
 
-    const std::vector<u8> code = DecompressData(compressed_code, code_size);
+    const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
     if (code.empty()) {
         return {};
     }
@@ -507,7 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
     if (!IsUsable())
         return;
 
-    const std::vector<u8> compressed_code{CompressData(code.data(), code.size())};
+    const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
+        reinterpret_cast<const u8*>(code.data()), code.size())};
     if (compressed_code.empty()) {
         LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
                   unique_identifier);
@@ -537,7 +505,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
     std::vector<u8> binary(binary_length);
     glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
 
-    const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size());
+    const std::vector<u8> compressed_binary =
+        Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
+
     if (compressed_binary.empty()) {
         LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
                   usage.unique_identifier);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 7d96649af..8763d9c71 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -3,7 +3,6 @@
 // Refer to the license.txt file included.
 
 #include <fmt/format.h>
-#include "common/assert.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index fba8e681b..fad346b48 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -4,12 +4,9 @@
 
 #pragma once
 
-#include <array>
-#include <string>
 #include <vector>
 
 #include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/shader/shader_ir.h"
 
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 6a30c28d2..eaf3e03a0 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,15 +2,15 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "core/core.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 
 namespace OpenGL::GLShader {
 
-void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    const auto& regs = gpu.regs;
-    const auto& state = gpu.state;
+using Tegra::Engines::Maxwell3D;
+
+void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
+    const auto& regs = maxwell.regs;
+    const auto& state = maxwell.state;
 
     // TODO(bunnei): Support more than one viewport
     viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
@@ -18,7 +18,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
 
     u32 func = static_cast<u32>(regs.alpha_test_func);
     // Normalize the gl variants of opCompare to be the same as the normal variants
-    u32 op_gl_variant_base = static_cast<u32>(Tegra::Engines::Maxwell3D::Regs::ComparisonOp::Never);
+    const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
     if (func >= op_gl_variant_base) {
         func = func - op_gl_variant_base + 1U;
     }
@@ -31,8 +31,9 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
 
     // Assign in which stage the position has to be flipped
     // (the last stage before the fragment shader).
-    if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) {
-        flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+    constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+    if (maxwell.regs.shader_config[geometry_index].enable) {
+        flip_stage = geometry_index;
     } else {
         flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 4970aafed..37dcfefdb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,14 +12,13 @@
 
 namespace OpenGL::GLShader {
 
-using Tegra::Engines::Maxwell3D;
-
 /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-//       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-//       Not following that rule will cause problems on some AMD drivers.
+/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
+///       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
+///       Not following that rule will cause problems on some AMD drivers.
 struct MaxwellUniformData {
-    void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
+    void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
+
     alignas(16) GLvec4 viewport_flip;
     struct alignas(16) {
         GLuint instance_id;
@@ -63,7 +62,6 @@ public:
         UpdatePipeline();
         state.draw.shader_program = 0;
         state.draw.program_pipeline = pipeline.handle;
-        state.geometry_shaders.enabled = (gs != 0);
     }
 
 private:
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 9419326a3..52d569a1b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -10,16 +10,62 @@
 
 namespace OpenGL {
 
-OpenGLState OpenGLState::cur_state;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
+OpenGLState OpenGLState::cur_state;
 bool OpenGLState::s_rgb_used;
 
+namespace {
+
+template <typename T>
+bool UpdateValue(T& current_value, const T new_value) {
+    const bool changed = current_value != new_value;
+    current_value = new_value;
+    return changed;
+}
+
+template <typename T1, typename T2>
+bool UpdateTie(T1 current_value, const T2 new_value) {
+    const bool changed = current_value != new_value;
+    current_value = new_value;
+    return changed;
+}
+
+void Enable(GLenum cap, bool enable) {
+    if (enable) {
+        glEnable(cap);
+    } else {
+        glDisable(cap);
+    }
+}
+
+void Enable(GLenum cap, GLuint index, bool enable) {
+    if (enable) {
+        glEnablei(cap, index);
+    } else {
+        glDisablei(cap, index);
+    }
+}
+
+void Enable(GLenum cap, bool& current_value, bool new_value) {
+    if (UpdateValue(current_value, new_value))
+        Enable(cap, new_value);
+}
+
+void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
+    if (UpdateValue(current_value, new_value))
+        Enable(cap, index, new_value);
+}
+
+} // namespace
+
 OpenGLState::OpenGLState() {
     // These all match default OpenGL values
-    geometry_shaders.enabled = false;
     framebuffer_srgb.enabled = false;
+
     multisample_control.alpha_to_coverage = false;
     multisample_control.alpha_to_one = false;
+
     cull.enabled = false;
     cull.mode = GL_BACK;
     cull.front_face = GL_CCW;
@@ -30,14 +76,15 @@ OpenGLState::OpenGLState() {
 
     primitive_restart.enabled = false;
     primitive_restart.index = 0;
+
     for (auto& item : color_mask) {
         item.red_enabled = GL_TRUE;
         item.green_enabled = GL_TRUE;
         item.blue_enabled = GL_TRUE;
         item.alpha_enabled = GL_TRUE;
     }
-    stencil.test_enabled = false;
-    auto reset_stencil = [](auto& config) {
+
+    const auto ResetStencil = [](auto& config) {
         config.test_func = GL_ALWAYS;
         config.test_ref = 0;
         config.test_mask = 0xFFFFFFFF;
@@ -46,8 +93,10 @@ OpenGLState::OpenGLState() {
         config.action_depth_pass = GL_KEEP;
         config.action_stencil_fail = GL_KEEP;
     };
-    reset_stencil(stencil.front);
-    reset_stencil(stencil.back);
+    stencil.test_enabled = false;
+    ResetStencil(stencil.front);
+    ResetStencil(stencil.back);
+
     for (auto& item : viewports) {
         item.x = 0;
         item.y = 0;
@@ -61,6 +110,7 @@ OpenGLState::OpenGLState() {
         item.scissor.width = 0;
         item.scissor.height = 0;
     }
+
     for (auto& item : blend) {
         item.enabled = true;
         item.rgb_equation = GL_FUNC_ADD;
@@ -70,11 +120,14 @@ OpenGLState::OpenGLState() {
         item.src_a_func = GL_ONE;
         item.dst_a_func = GL_ZERO;
     }
+
     independant_blend.enabled = false;
+
     blend_color.red = 0.0f;
     blend_color.green = 0.0f;
     blend_color.blue = 0.0f;
     blend_color.alpha = 0.0f;
+
     logic_op.enabled = false;
     logic_op.operation = GL_COPY;
 
@@ -91,9 +144,12 @@ OpenGLState::OpenGLState() {
     clip_distance = {};
 
     point.size = 1;
+
     fragment_color_clamp.enabled = false;
+
     depth_clamp.far_plane = false;
     depth_clamp.near_plane = false;
+
     polygon_offset.fill_enable = false;
     polygon_offset.line_enable = false;
     polygon_offset.point_enable = false;
@@ -103,260 +159,255 @@ OpenGLState::OpenGLState() {
 }
 
 void OpenGLState::ApplyDefaultState() {
+    glEnable(GL_BLEND);
     glDisable(GL_FRAMEBUFFER_SRGB);
     glDisable(GL_CULL_FACE);
     glDisable(GL_DEPTH_TEST);
     glDisable(GL_PRIMITIVE_RESTART);
     glDisable(GL_STENCIL_TEST);
-    glEnable(GL_BLEND);
     glDisable(GL_COLOR_LOGIC_OP);
     glDisable(GL_SCISSOR_TEST);
 }
 
+void OpenGLState::ApplyFramebufferState() const {
+    if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
+        glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
+    }
+    if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
+        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
+    }
+}
+
+void OpenGLState::ApplyVertexArrayState() const {
+    if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
+        glBindVertexArray(draw.vertex_array);
+    }
+}
+
+void OpenGLState::ApplyShaderProgram() const {
+    if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
+        glUseProgram(draw.shader_program);
+    }
+}
+
+void OpenGLState::ApplyProgramPipeline() const {
+    if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
+        glBindProgramPipeline(draw.program_pipeline);
+    }
+}
+
+void OpenGLState::ApplyClipDistances() const {
+    for (std::size_t i = 0; i < clip_distance.size(); ++i) {
+        Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
+               clip_distance[i]);
+    }
+}
+
+void OpenGLState::ApplyPointSize() const {
+    if (UpdateValue(cur_state.point.size, point.size)) {
+        glPointSize(point.size);
+    }
+}
+
+void OpenGLState::ApplyFragmentColorClamp() const {
+    if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
+        glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
+                     fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
+    }
+}
+
+void OpenGLState::ApplyMultisample() const {
+    Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
+           multisample_control.alpha_to_coverage);
+    Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
+           multisample_control.alpha_to_one);
+}
+
+void OpenGLState::ApplyDepthClamp() const {
+    if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
+        depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
+        return;
+    }
+    cur_state.depth_clamp = depth_clamp;
+
+    UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
+                         "Unimplemented Depth Clamp Separation!");
+
+    Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
+}
+
 void OpenGLState::ApplySRgb() const {
-    if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
-        if (framebuffer_srgb.enabled) {
-            // Track if sRGB is used
-            s_rgb_used = true;
-            glEnable(GL_FRAMEBUFFER_SRGB);
-        } else {
-            glDisable(GL_FRAMEBUFFER_SRGB);
-        }
+    if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
+        return;
+    cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
+    if (framebuffer_srgb.enabled) {
+        // Track if sRGB is used
+        s_rgb_used = true;
+        glEnable(GL_FRAMEBUFFER_SRGB);
+    } else {
+        glDisable(GL_FRAMEBUFFER_SRGB);
     }
 }
 
 void OpenGLState::ApplyCulling() const {
-    if (cull.enabled != cur_state.cull.enabled) {
-        if (cull.enabled) {
-            glEnable(GL_CULL_FACE);
-        } else {
-            glDisable(GL_CULL_FACE);
-        }
-    }
+    Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
 
-    if (cull.mode != cur_state.cull.mode) {
+    if (UpdateValue(cur_state.cull.mode, cull.mode)) {
         glCullFace(cull.mode);
     }
 
-    if (cull.front_face != cur_state.cull.front_face) {
+    if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
         glFrontFace(cull.front_face);
     }
 }
 
 void OpenGLState::ApplyColorMask() const {
-    if (independant_blend.enabled) {
-        for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
-            const auto& updated = color_mask[i];
-            const auto& current = cur_state.color_mask[i];
-            if (updated.red_enabled != current.red_enabled ||
-                updated.green_enabled != current.green_enabled ||
-                updated.blue_enabled != current.blue_enabled ||
-                updated.alpha_enabled != current.alpha_enabled) {
-                glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
-                             updated.blue_enabled, updated.alpha_enabled);
-            }
-        }
-    } else {
-        const auto& updated = color_mask[0];
-        const auto& current = cur_state.color_mask[0];
+    for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
+        const auto& updated = color_mask[i];
+        auto& current = cur_state.color_mask[i];
         if (updated.red_enabled != current.red_enabled ||
             updated.green_enabled != current.green_enabled ||
             updated.blue_enabled != current.blue_enabled ||
             updated.alpha_enabled != current.alpha_enabled) {
-            glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled,
-                        updated.alpha_enabled);
+            current = updated;
+            glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
+                         updated.blue_enabled, updated.alpha_enabled);
         }
     }
 }
 
 void OpenGLState::ApplyDepth() const {
-    if (depth.test_enabled != cur_state.depth.test_enabled) {
-        if (depth.test_enabled) {
-            glEnable(GL_DEPTH_TEST);
-        } else {
-            glDisable(GL_DEPTH_TEST);
-        }
-    }
+    Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
 
-    if (depth.test_func != cur_state.depth.test_func) {
+    if (cur_state.depth.test_func != depth.test_func) {
+        cur_state.depth.test_func = depth.test_func;
         glDepthFunc(depth.test_func);
     }
 
-    if (depth.write_mask != cur_state.depth.write_mask) {
+    if (cur_state.depth.write_mask != depth.write_mask) {
+        cur_state.depth.write_mask = depth.write_mask;
         glDepthMask(depth.write_mask);
     }
 }
 
 void OpenGLState::ApplyPrimitiveRestart() const {
-    if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
-        if (primitive_restart.enabled) {
-            glEnable(GL_PRIMITIVE_RESTART);
-        } else {
-            glDisable(GL_PRIMITIVE_RESTART);
-        }
-    }
+    Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
 
-    if (primitive_restart.index != cur_state.primitive_restart.index) {
+    if (cur_state.primitive_restart.index != primitive_restart.index) {
+        cur_state.primitive_restart.index = primitive_restart.index;
         glPrimitiveRestartIndex(primitive_restart.index);
     }
 }
 
 void OpenGLState::ApplyStencilTest() const {
-    if (stencil.test_enabled != cur_state.stencil.test_enabled) {
-        if (stencil.test_enabled) {
-            glEnable(GL_STENCIL_TEST);
-        } else {
-            glDisable(GL_STENCIL_TEST);
-        }
-    }
-
-    const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
-        if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
-            config.test_mask != prev_config.test_mask) {
+    Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
+
+    const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
+        if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
+            current.test_mask != config.test_mask) {
+            current.test_func = config.test_func;
+            current.test_ref = config.test_ref;
+            current.test_mask = config.test_mask;
             glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
         }
-        if (config.action_depth_fail != prev_config.action_depth_fail ||
-            config.action_depth_pass != prev_config.action_depth_pass ||
-            config.action_stencil_fail != prev_config.action_stencil_fail) {
+        if (current.action_depth_fail != config.action_depth_fail ||
+            current.action_depth_pass != config.action_depth_pass ||
+            current.action_stencil_fail != config.action_stencil_fail) {
+            current.action_depth_fail = config.action_depth_fail;
+            current.action_depth_pass = config.action_depth_pass;
+            current.action_stencil_fail = config.action_stencil_fail;
             glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
                                 config.action_depth_pass);
         }
-        if (config.write_mask != prev_config.write_mask) {
+        if (current.write_mask != config.write_mask) {
+            current.write_mask = config.write_mask;
             glStencilMaskSeparate(face, config.write_mask);
         }
     };
     ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
     ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
 }
-// Viewport does not affects glClearBuffer so emulate viewport using scissor test
-void OpenGLState::EmulateViewportWithScissor() {
-    auto& current = viewports[0];
-    if (current.scissor.enabled) {
-        const GLint left = std::max(current.x, current.scissor.x);
-        const GLint right =
-            std::max(current.x + current.width, current.scissor.x + current.scissor.width);
-        const GLint bottom = std::max(current.y, current.scissor.y);
-        const GLint top =
-            std::max(current.y + current.height, current.scissor.y + current.scissor.height);
-        current.scissor.x = std::max(left, 0);
-        current.scissor.y = std::max(bottom, 0);
-        current.scissor.width = std::max(right - left, 0);
-        current.scissor.height = std::max(top - bottom, 0);
-    } else {
-        current.scissor.enabled = true;
-        current.scissor.x = current.x;
-        current.scissor.y = current.y;
-        current.scissor.width = current.width;
-        current.scissor.height = current.height;
-    }
-}
 
 void OpenGLState::ApplyViewport() const {
-    if (geometry_shaders.enabled) {
-        for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports);
-             i++) {
-            const auto& current = cur_state.viewports[i];
-            const auto& updated = viewports[i];
-            if (updated.x != current.x || updated.y != current.y ||
-                updated.width != current.width || updated.height != current.height) {
-                glViewportIndexedf(
-                    i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
-                    static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height));
-            }
-            if (updated.depth_range_near != current.depth_range_near ||
-                updated.depth_range_far != current.depth_range_far) {
-                glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
-            }
-
-            if (updated.scissor.enabled != current.scissor.enabled) {
-                if (updated.scissor.enabled) {
-                    glEnablei(GL_SCISSOR_TEST, i);
-                } else {
-                    glDisablei(GL_SCISSOR_TEST, i);
-                }
-            }
-
-            if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
-                updated.scissor.width != current.scissor.width ||
-                updated.scissor.height != current.scissor.height) {
-                glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
-                                 updated.scissor.height);
-            }
-        }
-    } else {
-        const auto& current = cur_state.viewports[0];
-        const auto& updated = viewports[0];
-        if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
-            updated.height != current.height) {
-            glViewport(updated.x, updated.y, updated.width, updated.height);
-        }
-
-        if (updated.depth_range_near != current.depth_range_near ||
-            updated.depth_range_far != current.depth_range_far) {
-            glDepthRange(updated.depth_range_near, updated.depth_range_far);
+    for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
+        const auto& updated = viewports[i];
+        auto& current = cur_state.viewports[i];
+
+        if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
+            current.height != updated.height) {
+            current.x = updated.x;
+            current.y = updated.y;
+            current.width = updated.width;
+            current.height = updated.height;
+            glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
+                               static_cast<GLfloat>(updated.width),
+                               static_cast<GLfloat>(updated.height));
         }
-
-        if (updated.scissor.enabled != current.scissor.enabled) {
-            if (updated.scissor.enabled) {
-                glEnable(GL_SCISSOR_TEST);
-            } else {
-                glDisable(GL_SCISSOR_TEST);
-            }
+        if (current.depth_range_near != updated.depth_range_near ||
+            current.depth_range_far != updated.depth_range_far) {
+            current.depth_range_near = updated.depth_range_near;
+            current.depth_range_far = updated.depth_range_far;
+            glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
         }
 
-        if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
-            updated.scissor.width != current.scissor.width ||
-            updated.scissor.height != current.scissor.height) {
-            glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
-                      updated.scissor.height);
+        Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
+
+        if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
+            current.scissor.width != updated.scissor.width ||
+            current.scissor.height != updated.scissor.height) {
+            current.scissor.x = updated.scissor.x;
+            current.scissor.y = updated.scissor.y;
+            current.scissor.width = updated.scissor.width;
+            current.scissor.height = updated.scissor.height;
+            glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
+                             updated.scissor.height);
         }
     }
 }
 
 void OpenGLState::ApplyGlobalBlending() const {
-    const Blend& current = cur_state.blend[0];
     const Blend& updated = blend[0];
-    if (updated.enabled != current.enabled) {
-        if (updated.enabled) {
-            glEnable(GL_BLEND);
-        } else {
-            glDisable(GL_BLEND);
-        }
-    }
-    if (!updated.enabled) {
-        return;
-    }
-    if (updated.src_rgb_func != current.src_rgb_func ||
-        updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
-        updated.dst_a_func != current.dst_a_func) {
+    Blend& current = cur_state.blend[0];
+
+    Enable(GL_BLEND, current.enabled, updated.enabled);
+
+    if (current.src_rgb_func != updated.src_rgb_func ||
+        current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
+        current.dst_a_func != updated.dst_a_func) {
+        current.src_rgb_func = updated.src_rgb_func;
+        current.dst_rgb_func = updated.dst_rgb_func;
+        current.src_a_func = updated.src_a_func;
+        current.dst_a_func = updated.dst_a_func;
         glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
                             updated.dst_a_func);
     }
 
-    if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
+    if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
+        current.rgb_equation = updated.rgb_equation;
+        current.a_equation = updated.a_equation;
         glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
     }
 }
 
 void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
     const Blend& updated = blend[target];
-    const Blend& current = cur_state.blend[target];
-    if (updated.enabled != current.enabled || force) {
-        if (updated.enabled) {
-            glEnablei(GL_BLEND, static_cast<GLuint>(target));
-        } else {
-            glDisablei(GL_BLEND, static_cast<GLuint>(target));
-        }
+    Blend& current = cur_state.blend[target];
+
+    if (current.enabled != updated.enabled || force) {
+        current.enabled = updated.enabled;
+        Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
     }
 
-    if (updated.src_rgb_func != current.src_rgb_func ||
-        updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
-        updated.dst_a_func != current.dst_a_func) {
+    if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
+                           current.dst_a_func),
+                  std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
+                           updated.dst_a_func))) {
         glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
                              updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
     }
 
-    if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
+    if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
+                  std::tie(updated.rgb_equation, updated.a_equation))) {
         glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
                                  updated.a_equation);
     }
@@ -364,77 +415,48 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
 
 void OpenGLState::ApplyBlending() const {
     if (independant_blend.enabled) {
-        for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
-            ApplyTargetBlending(i,
-                                independant_blend.enabled != cur_state.independant_blend.enabled);
+        const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
+        for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
+            ApplyTargetBlending(target, force);
         }
     } else {
         ApplyGlobalBlending();
     }
-    if (blend_color.red != cur_state.blend_color.red ||
-        blend_color.green != cur_state.blend_color.green ||
-        blend_color.blue != cur_state.blend_color.blue ||
-        blend_color.alpha != cur_state.blend_color.alpha) {
+    cur_state.independant_blend.enabled = independant_blend.enabled;
+
+    if (UpdateTie(
+            std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
+                     cur_state.blend_color.blue, cur_state.blend_color.alpha),
+            std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
         glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
     }
 }
 
 void OpenGLState::ApplyLogicOp() const {
-    if (logic_op.enabled != cur_state.logic_op.enabled) {
-        if (logic_op.enabled) {
-            glEnable(GL_COLOR_LOGIC_OP);
-        } else {
-            glDisable(GL_COLOR_LOGIC_OP);
-        }
-    }
+    Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
 
-    if (logic_op.operation != cur_state.logic_op.operation) {
+    if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
         glLogicOp(logic_op.operation);
     }
 }
 
 void OpenGLState::ApplyPolygonOffset() const {
-    const bool fill_enable_changed =
-        polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
-    const bool line_enable_changed =
-        polygon_offset.line_enable != cur_state.polygon_offset.line_enable;
-    const bool point_enable_changed =
-        polygon_offset.point_enable != cur_state.polygon_offset.point_enable;
-    const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor;
-    const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units;
-    const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp;
-
-    if (fill_enable_changed) {
-        if (polygon_offset.fill_enable) {
-            glEnable(GL_POLYGON_OFFSET_FILL);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_FILL);
-        }
-    }
-
-    if (line_enable_changed) {
-        if (polygon_offset.line_enable) {
-            glEnable(GL_POLYGON_OFFSET_LINE);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_LINE);
-        }
-    }
-
-    if (point_enable_changed) {
-        if (polygon_offset.point_enable) {
-            glEnable(GL_POLYGON_OFFSET_POINT);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_POINT);
-        }
-    }
-
-    if (factor_changed || units_changed || clamp_changed) {
+    Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
+           polygon_offset.fill_enable);
+    Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
+           polygon_offset.line_enable);
+    Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
+           polygon_offset.point_enable);
+
+    if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
+                           cur_state.polygon_offset.clamp),
+                  std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
         if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
             glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
         } else {
-            glPolygonOffset(polygon_offset.factor, polygon_offset.units);
             UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
                                  "Unimplemented Depth polygon offset clamp.");
+            glPolygonOffset(polygon_offset.factor, polygon_offset.units);
         }
     }
 }
@@ -443,22 +465,21 @@ void OpenGLState::ApplyTextures() const {
     bool has_delta{};
     std::size_t first{};
     std::size_t last{};
-    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
+    std::array<GLuint, Maxwell::NumTextureSamplers> textures;
 
     for (std::size_t i = 0; i < std::size(texture_units); ++i) {
         const auto& texture_unit = texture_units[i];
-        const auto& cur_state_texture_unit = cur_state.texture_units[i];
+        auto& cur_state_texture_unit = cur_state.texture_units[i];
         textures[i] = texture_unit.texture;
-
-        if (textures[i] != cur_state_texture_unit.texture) {
-            if (!has_delta) {
-                first = i;
-                has_delta = true;
-            }
-            last = i;
+        if (cur_state_texture_unit.texture == textures[i])
+            continue;
+        cur_state_texture_unit.texture = textures[i];
+        if (!has_delta) {
+            first = i;
+            has_delta = true;
         }
+        last = i;
     }
-
     if (has_delta) {
         glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
                        textures.data() + first);
@@ -469,16 +490,18 @@ void OpenGLState::ApplySamplers() const {
     bool has_delta{};
     std::size_t first{};
     std::size_t last{};
-    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
+    std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
+
     for (std::size_t i = 0; i < std::size(samplers); ++i) {
+        if (cur_state.texture_units[i].sampler == texture_units[i].sampler)
+            continue;
+        cur_state.texture_units[i].sampler = texture_units[i].sampler;
         samplers[i] = texture_units[i].sampler;
-        if (samplers[i] != cur_state.texture_units[i].sampler) {
-            if (!has_delta) {
-                first = i;
-                has_delta = true;
-            }
-            last = i;
+        if (!has_delta) {
+            first = i;
+            has_delta = true;
         }
+        last = i;
     }
     if (has_delta) {
         glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
@@ -486,81 +509,15 @@ void OpenGLState::ApplySamplers() const {
     }
 }
 
-void OpenGLState::ApplyFramebufferState() const {
-    if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
-        glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
-    }
-    if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
-        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
-    }
-}
-
-void OpenGLState::ApplyVertexArrayState() const {
-    if (draw.vertex_array != cur_state.draw.vertex_array) {
-        glBindVertexArray(draw.vertex_array);
-    }
-}
-
-void OpenGLState::ApplyDepthClamp() const {
-    if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
-        depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
-        return;
-    }
-    UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
-                         "Unimplemented Depth Clamp Separation!");
-
-    if (depth_clamp.far_plane || depth_clamp.near_plane) {
-        glEnable(GL_DEPTH_CLAMP);
-    } else {
-        glDisable(GL_DEPTH_CLAMP);
-    }
-}
-
 void OpenGLState::Apply() const {
     ApplyFramebufferState();
     ApplyVertexArrayState();
-
-    // Shader program
-    if (draw.shader_program != cur_state.draw.shader_program) {
-        glUseProgram(draw.shader_program);
-    }
-
-    // Program pipeline
-    if (draw.program_pipeline != cur_state.draw.program_pipeline) {
-        glBindProgramPipeline(draw.program_pipeline);
-    }
-    // Clip distance
-    for (std::size_t i = 0; i < clip_distance.size(); ++i) {
-        if (clip_distance[i] != cur_state.clip_distance[i]) {
-            if (clip_distance[i]) {
-                glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
-            } else {
-                glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
-            }
-        }
-    }
-    // Point
-    if (point.size != cur_state.point.size) {
-        glPointSize(point.size);
-    }
-    if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) {
-        glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
-                     fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
-    }
-    if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) {
-        if (multisample_control.alpha_to_coverage) {
-            glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
-        } else {
-            glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
-        }
-    }
-    if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) {
-        if (multisample_control.alpha_to_one) {
-            glEnable(GL_SAMPLE_ALPHA_TO_ONE);
-        } else {
-            glDisable(GL_SAMPLE_ALPHA_TO_ONE);
-        }
-    }
+    ApplyShaderProgram();
+    ApplyProgramPipeline();
+    ApplyClipDistances();
+    ApplyPointSize();
+    ApplyFragmentColorClamp();
+    ApplyMultisample();
     ApplyDepthClamp();
     ApplyColorMask();
     ApplyViewport();
@@ -574,7 +531,28 @@ void OpenGLState::Apply() const {
     ApplyTextures();
     ApplySamplers();
     ApplyPolygonOffset();
-    cur_state = *this;
+}
+
+void OpenGLState::EmulateViewportWithScissor() {
+    auto& current = viewports[0];
+    if (current.scissor.enabled) {
+        const GLint left = std::max(current.x, current.scissor.x);
+        const GLint right =
+            std::max(current.x + current.width, current.scissor.x + current.scissor.width);
+        const GLint bottom = std::max(current.y, current.scissor.y);
+        const GLint top =
+            std::max(current.y + current.height, current.scissor.y + current.scissor.height);
+        current.scissor.x = std::max(left, 0);
+        current.scissor.y = std::max(bottom, 0);
+        current.scissor.width = std::max(right - left, 0);
+        current.scissor.height = std::max(top - bottom, 0);
+    } else {
+        current.scissor.enabled = true;
+        current.scissor.x = current.x;
+        current.scissor.y = current.y;
+        current.scissor.width = current.width;
+        current.scissor.height = current.height;
+    }
 }
 
 OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 9e1eda5b1..41418a7b8 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -54,10 +54,6 @@ public:
     } depth_clamp; // GL_DEPTH_CLAMP
 
     struct {
-        bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
-    } geometry_shaders;
-
-    struct {
         bool enabled;      // GL_CULL_FACE
         GLenum mode;       // GL_CULL_FACE_MODE
         GLenum front_face; // GL_FRONT_FACE
@@ -184,34 +180,26 @@ public:
     static OpenGLState GetCurState() {
         return cur_state;
     }
+
     static bool GetsRGBUsed() {
         return s_rgb_used;
     }
+
     static void ClearsRGBUsed() {
         s_rgb_used = false;
     }
+
     /// Apply this state as the current OpenGL state
     void Apply() const;
-    /// Apply only the state affecting the framebuffer
+
     void ApplyFramebufferState() const;
-    /// Apply only the state affecting the vertex array
     void ApplyVertexArrayState() const;
-    /// Set the initial OpenGL state
-    static void ApplyDefaultState();
-    /// Resets any references to the given resource
-    OpenGLState& UnbindTexture(GLuint handle);
-    OpenGLState& ResetSampler(GLuint handle);
-    OpenGLState& ResetProgram(GLuint handle);
-    OpenGLState& ResetPipeline(GLuint handle);
-    OpenGLState& ResetVertexArray(GLuint handle);
-    OpenGLState& ResetFramebuffer(GLuint handle);
-    void EmulateViewportWithScissor();
-
-private:
-    static OpenGLState cur_state;
-    // Workaround for sRGB problems caused by
-    // QT not supporting srgb output
-    static bool s_rgb_used;
+    void ApplyShaderProgram() const;
+    void ApplyProgramPipeline() const;
+    void ApplyClipDistances() const;
+    void ApplyPointSize() const;
+    void ApplyFragmentColorClamp() const;
+    void ApplyMultisample() const;
     void ApplySRgb() const;
     void ApplyCulling() const;
     void ApplyColorMask() const;
@@ -227,6 +215,26 @@ private:
     void ApplySamplers() const;
     void ApplyDepthClamp() const;
     void ApplyPolygonOffset() const;
+
+    /// Set the initial OpenGL state
+    static void ApplyDefaultState();
+
+    /// Resets any references to the given resource
+    OpenGLState& UnbindTexture(GLuint handle);
+    OpenGLState& ResetSampler(GLuint handle);
+    OpenGLState& ResetProgram(GLuint handle);
+    OpenGLState& ResetPipeline(GLuint handle);
+    OpenGLState& ResetVertexArray(GLuint handle);
+    OpenGLState& ResetFramebuffer(GLuint handle);
+
+    /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
+    void EmulateViewportWithScissor();
+
+private:
+    static OpenGLState cur_state;
+
+    // Workaround for sRGB problems caused by QT not supporting srgb output
+    static bool s_rgb_used;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b97576309..d69cba9c3 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,7 +5,6 @@
 #include <algorithm>
 #include <cstddef>
 #include <cstdlib>
-#include <cstring>
 #include <memory>
 #include <glad/glad.h>
 #include "common/assert.h"
@@ -164,8 +163,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
         // Reset the screen info's display texture to its own permanent texture
         screen_info.display_texture = screen_info.texture.resource.handle;
 
-        Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
-                                             Memory::FlushMode::Flush);
+        rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
 
         constexpr u32 linear_bpp = 4;
         VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
@@ -267,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() {
     }
     // Initialize sRGB Usage
     OpenGLState::ClearsRGBUsed();
-    rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info);
+    rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info);
 }
 
 void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 4a33a6c84..02a9f5ecb 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -10,6 +10,7 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "core/memory.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -17,6 +18,11 @@
 
 namespace Vulkan {
 
+CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
+                                     std::size_t alignment, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
+      alignment{alignment} {}
+
 VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
                              VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
                              VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
@@ -34,19 +40,20 @@ VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
 
 VKBufferCache::~VKBufferCache() = default;
 
-u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
-                                bool cache) {
+u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
     const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT(cpu_addr);
+    ASSERT_MSG(cpu_addr, "Invalid GPU address");
 
     // Cache management is a big overhead, so only cache entries with a given size.
     // TODO: Figure out which size is the best for given games.
     cache &= size >= 2048;
 
+    const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
     if (cache) {
-        if (auto entry = TryGet(*cpu_addr); entry) {
-            if (entry->size >= size && entry->alignment == alignment) {
-                return entry->offset;
+        auto entry = TryGet(host_ptr);
+        if (entry) {
+            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
+                return entry->GetOffset();
             }
             Unregister(entry);
         }
@@ -55,17 +62,17 @@ u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64
     AlignBuffer(alignment);
     const u64 uploaded_offset = buffer_offset;
 
-    Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+    if (!host_ptr) {
+        return uploaded_offset;
+    }
 
+    std::memcpy(buffer_ptr, host_ptr, size);
     buffer_ptr += size;
     buffer_offset += size;
 
     if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>();
-        entry->offset = uploaded_offset;
-        entry->size = size;
-        entry->alignment = alignment;
-        entry->addr = *cpu_addr;
+        auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
+                                                         alignment, host_ptr);
         Register(entry);
     }
 
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index d8e916f31..08b786aad 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -24,22 +24,39 @@ class VKFence;
 class VKMemoryManager;
 class VKStreamBuffer;
 
-struct CachedBufferEntry final : public RasterizerCacheObject {
-    VAddr GetAddr() const override {
-        return addr;
+class CachedBufferEntry final : public RasterizerCacheObject {
+public:
+    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
+                               u8* host_ptr);
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
     }
 
     std::size_t GetSizeInBytes() const override {
         return size;
     }
 
+    std::size_t GetSize() const {
+        return size;
+    }
+
+    u64 GetOffset() const {
+        return offset;
+    }
+
+    std::size_t GetAlignment() const {
+        return alignment;
+    }
+
     // We do not have to flush this cache as things in it are never modified by us.
     void Flush() override {}
 
-    VAddr addr;
-    std::size_t size;
-    u64 offset;
-    std::size_t alignment;
+private:
+    VAddr cpu_addr{};
+    std::size_t size{};
+    u64 offset{};
+    std::size_t alignment{};
 };
 
 class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
@@ -51,8 +68,7 @@ public:
 
     /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
     /// allocated.
-    u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
-                     bool cache = true);
+    u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
 
     /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
     u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
index a1e117443..13c46e5b8 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -21,7 +21,7 @@ public:
     CommandBufferPool(const VKDevice& device)
         : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
 
-    void Allocate(std::size_t begin, std::size_t end) {
+    void Allocate(std::size_t begin, std::size_t end) override {
         const auto dev = device.GetLogical();
         const auto& dld = device.GetDispatchLoader();
         const u32 graphics_family = device.GetGraphicsFamily();
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
index 5bfe4cead..08ee86fa6 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -97,7 +97,7 @@ private:
 class VKFenceWatch final : public VKResource {
 public:
     explicit VKFenceWatch();
-    ~VKFenceWatch();
+    ~VKFenceWatch() override;
 
     /// Waits for the fence to be released.
     void Wait();
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
new file mode 100644
index 000000000..08279e562
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -0,0 +1,210 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/frontend/framebuffer_layout.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+
+namespace Vulkan {
+
+namespace {
+vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
+    if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
+        return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
+    }
+    const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
+        return format.format == vk::Format::eB8G8R8A8Unorm &&
+               format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
+    });
+    return found != formats.end() ? *found : formats[0];
+}
+
+vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) {
+    // Mailbox doesn't lock the application like fifo (vsync), prefer it
+    const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) {
+        return mode == vk::PresentModeKHR::eMailbox;
+    });
+    return found != modes.end() ? *found : vk::PresentModeKHR::eFifo;
+}
+
+vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
+                              u32 height) {
+    constexpr auto undefined_size{std::numeric_limits<u32>::max()};
+    if (capabilities.currentExtent.width != undefined_size) {
+        return capabilities.currentExtent;
+    }
+    vk::Extent2D extent = {width, height};
+    extent.width = std::max(capabilities.minImageExtent.width,
+                            std::min(capabilities.maxImageExtent.width, extent.width));
+    extent.height = std::max(capabilities.minImageExtent.height,
+                             std::min(capabilities.maxImageExtent.height, extent.height));
+    return extent;
+}
+} // namespace
+
+VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
+    : surface{surface}, device{device} {}
+
+VKSwapchain::~VKSwapchain() = default;
+
+void VKSwapchain::Create(u32 width, u32 height) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto physical_device = device.GetPhysical();
+
+    const vk::SurfaceCapabilitiesKHR capabilities{
+        physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
+    if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
+        return;
+    }
+
+    dev.waitIdle(dld);
+    Destroy();
+
+    CreateSwapchain(capabilities, width, height);
+    CreateSemaphores();
+    CreateImageViews();
+
+    fences.resize(image_count, nullptr);
+}
+
+void VKSwapchain::AcquireNextImage() {
+    const auto dev{device.GetLogical()};
+    const auto& dld{device.GetDispatchLoader()};
+    dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
+                            *present_semaphores[frame_index], {}, &image_index, dld);
+
+    if (auto& fence = fences[image_index]; fence) {
+        fence->Wait();
+        fence->Release();
+        fence = nullptr;
+    }
+}
+
+bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
+    const vk::Semaphore present_semaphore{*present_semaphores[frame_index]};
+    const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore};
+    const u32 wait_semaphore_count{render_semaphore ? 2U : 1U};
+    const auto& dld{device.GetDispatchLoader()};
+    const auto present_queue{device.GetPresentQueue()};
+    bool recreated = false;
+
+    const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1,
+                                          &swapchain.get(), &image_index, {});
+    switch (const auto result = present_queue.presentKHR(&present_info, dld); result) {
+    case vk::Result::eSuccess:
+        break;
+    case vk::Result::eErrorOutOfDateKHR:
+        if (current_width > 0 && current_height > 0) {
+            Create(current_width, current_height);
+            recreated = true;
+        }
+        break;
+    default:
+        LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!",
+                     vk::to_string(result));
+        UNREACHABLE();
+    }
+
+    ASSERT(fences[image_index] == nullptr);
+    fences[image_index] = &fence;
+    frame_index = (frame_index + 1) % image_count;
+    return recreated;
+}
+
+bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
+    // TODO(Rodrigo): Handle framebuffer pixel format changes
+    return framebuffer.width != current_width || framebuffer.height != current_height;
+}
+
+void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
+                                  u32 height) {
+    const auto dev{device.GetLogical()};
+    const auto& dld{device.GetDispatchLoader()};
+    const auto physical_device{device.GetPhysical()};
+
+    const std::vector<vk::SurfaceFormatKHR> formats{
+        physical_device.getSurfaceFormatsKHR(surface, dld)};
+
+    const std::vector<vk::PresentModeKHR> present_modes{
+        physical_device.getSurfacePresentModesKHR(surface, dld)};
+
+    const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
+    const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
+    extent = ChooseSwapExtent(capabilities, width, height);
+
+    current_width = extent.width;
+    current_height = extent.height;
+
+    u32 requested_image_count{capabilities.minImageCount + 1};
+    if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
+        requested_image_count = capabilities.maxImageCount;
+    }
+
+    vk::SwapchainCreateInfoKHR swapchain_ci(
+        {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace,
+        extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {},
+        capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false,
+        {});
+
+    const u32 graphics_family{device.GetGraphicsFamily()};
+    const u32 present_family{device.GetPresentFamily()};
+    const std::array<u32, 2> queue_indices{graphics_family, present_family};
+    if (graphics_family != present_family) {
+        swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent;
+        swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
+        swapchain_ci.pQueueFamilyIndices = queue_indices.data();
+    } else {
+        swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
+    }
+
+    swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
+
+    images = dev.getSwapchainImagesKHR(*swapchain, dld);
+    image_count = static_cast<u32>(images.size());
+    image_format = surface_format.format;
+}
+
+void VKSwapchain::CreateSemaphores() {
+    const auto dev{device.GetLogical()};
+    const auto& dld{device.GetDispatchLoader()};
+
+    present_semaphores.resize(image_count);
+    for (std::size_t i = 0; i < image_count; i++) {
+        present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld);
+    }
+}
+
+void VKSwapchain::CreateImageViews() {
+    const auto dev{device.GetLogical()};
+    const auto& dld{device.GetDispatchLoader()};
+
+    image_views.resize(image_count);
+    for (std::size_t i = 0; i < image_count; i++) {
+        const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D,
+                                                    image_format, {},
+                                                    {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1});
+        image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld);
+    }
+}
+
+void VKSwapchain::Destroy() {
+    frame_index = 0;
+    present_semaphores.clear();
+    framebuffers.clear();
+    image_views.clear();
+    swapchain.reset();
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
new file mode 100644
index 000000000..2ad84f185
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -0,0 +1,92 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Layout {
+struct FramebufferLayout;
+}
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+
+class VKSwapchain {
+public:
+    explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device);
+    ~VKSwapchain();
+
+    /// Creates (or recreates) the swapchain with a given size.
+    void Create(u32 width, u32 height);
+
+    /// Acquires the next image in the swapchain, waits as needed.
+    void AcquireNextImage();
+
+    /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
+    /// recreated. Takes responsability for the ownership of fence.
+    bool Present(vk::Semaphore render_semaphore, VKFence& fence);
+
+    /// Returns true when the framebuffer layout has changed.
+    bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
+
+    const vk::Extent2D& GetSize() const {
+        return extent;
+    }
+
+    u32 GetImageCount() const {
+        return image_count;
+    }
+
+    u32 GetImageIndex() const {
+        return image_index;
+    }
+
+    vk::Image GetImageIndex(u32 index) const {
+        return images[index];
+    }
+
+    vk::ImageView GetImageViewIndex(u32 index) const {
+        return *image_views[index];
+    }
+
+    vk::Format GetImageFormat() const {
+        return image_format;
+    }
+
+private:
+    void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
+    void CreateSemaphores();
+    void CreateImageViews();
+
+    void Destroy();
+
+    const vk::SurfaceKHR surface;
+    const VKDevice& device;
+
+    UniqueSwapchainKHR swapchain;
+
+    u32 image_count{};
+    std::vector<vk::Image> images;
+    std::vector<UniqueImageView> image_views;
+    std::vector<UniqueFramebuffer> framebuffers;
+    std::vector<VKFence*> fences;
+    std::vector<UniqueSemaphore> present_semaphores;
+
+    u32 image_index{};
+    u32 frame_index{};
+
+    vk::Format image_format{};
+    vk::Extent2D extent{};
+
+    u32 current_width{};
+    u32 current_height{};
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index a99ae19bf..a775b402b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -7,7 +7,9 @@
 #include <fmt/format.h>
 
 #include "common/assert.h"
+#include "common/bit_field.h"
 #include "common/common_types.h"
+#include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/shader_ir.h"
 
@@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
 
     switch (opcode->get().GetId()) {
     case OpCode::Id::TEX: {
-        UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
-
         if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
             LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
         }
 
         const TextureType texture_type{instr.tex.texture_type};
         const bool is_array = instr.tex.array != 0;
+        const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
         const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
         const auto process_mode = instr.tex.GetTextureProcessMode();
         WriteTexInstructionFloat(
-            bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
+            bb, instr,
+            GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
         break;
     }
     case OpCode::Id::TEXS: {
@@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
     }
     case OpCode::Id::TLD4: {
         ASSERT(instr.tld4.array == 0);
-        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
                              "NDV is not implemented");
         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
@@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         const auto texture_type = instr.tld4.texture_type.Value();
         const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
         const bool is_array = instr.tld4.array != 0;
-        WriteTexInstructionFloat(bb, instr,
-                                 GetTld4Code(instr, texture_type, depth_compare, is_array));
+        const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
+        WriteTexInstructionFloat(
+            bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
         break;
     }
     case OpCode::Id::TLD4S: {
@@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         Node4 values;
         for (u32 element = 0; element < values.size(); ++element) {
             auto coords_copy = coords;
-            MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
+            MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
         }
 
@@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                 if (!instr.txq.IsComponentEnabled(element)) {
                     continue;
                 }
-                MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
+                MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
                 const Node value =
                     Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
                 SetTemporal(bb, indexer++, value);
@@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
 
         for (u32 element = 0; element < 2; ++element) {
             auto params = coords;
-            MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
+            MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
             const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
             SetTemporal(bb, element, value);
         }
@@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
 
 Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                                TextureProcessMode process_mode, std::vector<Node> coords,
-                               Node array, Node depth_compare, u32 bias_offset) {
+                               Node array, Node depth_compare, u32 bias_offset,
+                               std::vector<Node> aoffi) {
     const bool is_array = array;
     const bool is_shadow = depth_compare;
 
@@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto copy_coords = coords;
-        MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
+        MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
         values[element] = Operation(read_method, meta, std::move(copy_coords));
     }
 
@@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
 }
 
 Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
-                           TextureProcessMode process_mode, bool depth_compare, bool is_array) {
-    const bool lod_bias_enabled =
-        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
+                           TextureProcessMode process_mode, bool depth_compare, bool is_array,
+                           bool is_aoffi) {
+    const bool lod_bias_enabled{
+        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
+
+    u64 parameter_register = instr.gpr20.Value();
+    if (lod_bias_enabled) {
+        ++parameter_register;
+    }
 
     const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
         texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
@@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
 
     const Node array = is_array ? GetRegister(array_register) : nullptr;
 
+    std::vector<Node> aoffi;
+    if (is_aoffi) {
+        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
+    }
+
     Node dc{};
     if (depth_compare) {
         // Depth is always stored in the register signaled by gpr20 or in the next register if lod
         // or bias are used
-        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        dc = GetRegister(depth_register);
+        dc = GetRegister(parameter_register++);
     }
 
-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
 }
 
 Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
         dc = GetRegister(depth_register);
     }
 
-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
 }
 
 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
-                            bool is_array) {
+                            bool is_array, bool is_aoffi) {
     const std::size_t coord_count = GetCoordCount(texture_type);
     const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
     const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
@@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
     const u64 coord_register = array_register + (is_array ? 1 : 0);
 
     std::vector<Node> coords;
-    for (size_t i = 0; i < coord_count; ++i)
+    for (std::size_t i = 0; i < coord_count; ++i) {
         coords.push_back(GetRegister(coord_register + i));
+    }
+
+    u64 parameter_register = instr.gpr20.Value();
+    std::vector<Node> aoffi;
+    if (is_aoffi) {
+        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+    }
+
+    Node dc{};
+    if (depth_compare) {
+        dc = GetRegister(parameter_register++);
+    }
 
     const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
 
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
-        MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
+        MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
     }
 
@@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
-        MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
     }
     return values;
@@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
     return {coord_count, total_coord_count};
 }
 
-} // namespace VideoCommon::Shader
-\ No newline at end of file
+std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
+                                                bool is_tld4) {
+    const auto [coord_offsets, size, wrap_value,
+                diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
+        if (is_tld4) {
+            return {{0, 8, 16}, 6, 32, 64};
+        } else {
+            return {{0, 4, 8}, 4, 8, 16};
+        }
+    }();
+    const u32 mask = (1U << size) - 1;
+
+    std::vector<Node> aoffi;
+    aoffi.reserve(coord_count);
+
+    const auto aoffi_immediate{
+        TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
+    if (!aoffi_immediate) {
+        // Variable access, not supported on AMD.
+        LOG_WARNING(HW_GPU,
+                    "AOFFI constant folding failed, some hardware might have graphical issues");
+        for (std::size_t coord = 0; coord < coord_count; ++coord) {
+            const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
+            const Node condition =
+                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
+            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
+            aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
+        }
+        return aoffi;
+    }
+
+    for (std::size_t coord = 0; coord < coord_count; ++coord) {
+        s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
+        if (value >= wrap_value) {
+            value -= diff_value;
+        }
+        aoffi.push_back(Immediate(value));
+    }
+    return aoffi;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 5bc3a3900..4888998d3 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -7,6 +7,7 @@
 #include <array>
 #include <cstring>
 #include <map>
+#include <optional>
 #include <set>
 #include <string>
 #include <tuple>
@@ -290,6 +291,7 @@ struct MetaTexture {
     const Sampler& sampler;
     Node array{};
     Node depth_compare{};
+    std::vector<Node> aoffi;
     Node bias{};
     Node lod{};
     Node component{};
@@ -741,14 +743,14 @@ private:
 
     Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
-                     bool is_array);
+                     bool is_array, bool is_aoffi);
 
     Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                       Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
                       bool is_array);
 
     Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                      bool depth_compare, bool is_array);
+                      bool depth_compare, bool is_array, bool is_aoffi);
 
     Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                       bool is_array);
@@ -757,9 +759,11 @@ private:
         Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
         bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
 
+    std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
+
     Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                          Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
-                         Node array, Node depth_compare, u32 bias_offset);
+                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
 
     Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
                          u64 byte_height);
@@ -773,6 +777,8 @@ private:
 
     Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
 
+    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
+
     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
 
     template <typename... T>
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 33b071747..4505667ff 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -6,6 +6,7 @@
 #include <utility>
 #include <variant>
 
+#include "common/common_types.h"
 #include "video_core/shader/shader_ir.h"
 
 namespace VideoCommon::Shader {
@@ -14,7 +15,7 @@ namespace {
 std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
                                    OperationCode operation_code) {
     for (; cursor >= 0; --cursor) {
-        const Node node = code[cursor];
+        const Node node = code.at(cursor);
         if (const auto operation = std::get_if<OperationNode>(node)) {
             if (operation->GetCode() == operation_code)
                 return {node, cursor};
@@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
     return nullptr;
 }
 
+std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
+    // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
+    // that it uses as operand
+    const auto [found, found_cursor] =
+        TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
+    if (!found) {
+        return {};
+    }
+    if (const auto immediate = std::get_if<ImmediateNode>(found)) {
+        return immediate->GetValue();
+    }
+    return {};
+}
+
 std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
                                              s64 cursor) {
     for (; cursor >= 0; --cursor) {
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index cad7340f5..995d0e068 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,7 +6,6 @@
 #include <cstring>
 #include "common/alignment.h"
 #include "common/assert.h"
-#include "core/memory.h"
 #include "video_core/gpu.h"
 #include "video_core/textures/decoders.h"
 #include "video_core/textures/texture.h"
@@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) {
     }
 }
 
-void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
                       u32 block_depth, u32 width_spacing) {
     CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
                      (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
-                     bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true,
-                     block_height, block_depth, width_spacing);
+                     bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
+                     width_spacing);
 }
 
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
-                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
-                                 u32 block_height, u32 block_depth, u32 width_spacing) {
+std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
+                                 u32 width, u32 height, u32 depth, u32 block_height,
+                                 u32 block_depth, u32 width_spacing) {
     std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
     UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
                      width, height, depth, block_height, block_depth, width_spacing);
@@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y
 }
 
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                    u32 block_height) {
+                    u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
     const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
                                   gob_size_x};
     for (u32 line = 0; line < subrect_height; ++line) {
@@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
             const u32 gob_address =
                 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
             const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
-            const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
-            const VAddr dest_addr = swizzled_data + swizzled_offset;
+            u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
+            u8* dest_addr = swizzled_data + swizzled_offset;
 
-            Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel);
+            std::memcpy(dest_addr, source_line, bytes_per_pixel);
         }
     }
 }
 
 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                      u32 block_height, u32 offset_x, u32 offset_y) {
+                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+                      u32 offset_x, u32 offset_y) {
     for (u32 line = 0; line < subrect_height; ++line) {
         const u32 y2 = line + offset_y;
         const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
             const u32 x2 = (x + offset_x) * bytes_per_pixel;
             const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
             const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
-            const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
-            const VAddr source_addr = swizzled_data + swizzled_offset;
+            u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
+            u8* source_addr = swizzled_data + swizzled_offset;
 
-            Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel);
+            std::memcpy(dest_line, source_addr, bytes_per_pixel);
         }
     }
 }
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 65df86890..e078fa274 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -17,14 +17,14 @@ inline std::size_t GetGOBSize() {
 }
 
 /// Unswizzles a swizzled texture without changing its format.
-void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                       u32 block_height = TICEntry::DefaultBlockHeight,
                       u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
 
 /// Unswizzles a swizzled texture without changing its format.
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
-                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
+std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
+                                 u32 width, u32 height, u32 depth,
                                  u32 block_height = TICEntry::DefaultBlockHeight,
                                  u32 block_depth = TICEntry::DefaultBlockHeight,
                                  u32 width_spacing = 0);
@@ -44,12 +44,11 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
 
 /// Copies an untiled subrectangle into a tiled surface.
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                    u32 block_height);
+                    u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
 
 /// Copies a tiled subrectangle into a linear surface.
 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                      u32 block_height, u32 offset_x, u32 offset_y);
+                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+                      u32 offset_x, u32 offset_y);
 
 } // namespace Tegra::Texture
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index 40da1a4e2..dc149d2ed 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -24,7 +24,7 @@ constexpr u32 TIMEOUT_SECONDS = 30;
 struct Client::Impl {
     Impl(std::string host, std::string username, std::string token)
         : host{std::move(host)}, username{std::move(username)}, token{std::move(token)} {
-        std::lock_guard<std::mutex> lock(jwt_cache.mutex);
+        std::lock_guard lock{jwt_cache.mutex};
         if (this->username == jwt_cache.username && this->token == jwt_cache.token) {
             jwt = jwt_cache.jwt;
         }
@@ -151,7 +151,7 @@ struct Client::Impl {
         if (result.result_code != Common::WebResult::Code::Success) {
             LOG_ERROR(WebService, "UpdateJWT failed");
         } else {
-            std::lock_guard<std::mutex> lock(jwt_cache.mutex);
+            std::lock_guard lock{jwt_cache.mutex};
             jwt_cache.username = username;
             jwt_cache.token = token;
             jwt_cache.jwt = jwt = result.returned_data;
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp
index 3a0824547..743b24d76 100644
--- a/src/yuzu/applets/profile_select.cpp
+++ b/src/yuzu/applets/profile_select.cpp
@@ -59,10 +59,7 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
 
     scroll_area = new QScrollArea;
 
-    buttons = new QDialogButtonBox;
-    buttons->addButton(tr("Cancel"), QDialogButtonBox::RejectRole);
-    buttons->addButton(tr("OK"), QDialogButtonBox::AcceptRole);
-
+    buttons = new QDialogButtonBox(QDialogButtonBox::Cancel | QDialogButtonBox::Ok);
     connect(buttons, &QDialogButtonBox::accepted, this, &QtProfileSelectionDialog::accept);
     connect(buttons, &QDialogButtonBox::rejected, this, &QtProfileSelectionDialog::reject);
 
@@ -164,6 +161,6 @@ void QtProfileSelector::SelectProfile(
 
 void QtProfileSelector::MainWindowFinishedSelection(std::optional<Service::Account::UUID> uuid) {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     callback(uuid);
 }
diff --git a/src/yuzu/applets/software_keyboard.cpp b/src/yuzu/applets/software_keyboard.cpp
index 8a26fdff1..f3eb29b25 100644
--- a/src/yuzu/applets/software_keyboard.cpp
+++ b/src/yuzu/applets/software_keyboard.cpp
@@ -75,13 +75,13 @@ QtSoftwareKeyboardDialog::QtSoftwareKeyboardDialog(
         length_label->setText(QStringLiteral("%1/%2").arg(text.size()).arg(parameters.max_length));
     });
 
-    buttons = new QDialogButtonBox;
-    buttons->addButton(tr("Cancel"), QDialogButtonBox::RejectRole);
-    buttons->addButton(parameters.submit_text.empty()
-                           ? tr("OK")
-                           : QString::fromStdU16String(parameters.submit_text),
-                       QDialogButtonBox::AcceptRole);
-
+    buttons = new QDialogButtonBox(QDialogButtonBox::Cancel);
+    if (parameters.submit_text.empty()) {
+        buttons->addButton(QDialogButtonBox::Ok);
+    } else {
+        buttons->addButton(QString::fromStdU16String(parameters.submit_text),
+                           QDialogButtonBox::AcceptRole);
+    }
     connect(buttons, &QDialogButtonBox::accepted, this, &QtSoftwareKeyboardDialog::accept);
     connect(buttons, &QDialogButtonBox::rejected, this, &QtSoftwareKeyboardDialog::reject);
     layout->addWidget(header_label);
@@ -141,12 +141,12 @@ void QtSoftwareKeyboard::SendTextCheckDialog(std::u16string error_message,
 
 void QtSoftwareKeyboard::MainWindowFinishedText(std::optional<std::u16string> text) {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     text_output(text);
 }
 
 void QtSoftwareKeyboard::MainWindowFinishedCheckDialog() {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     finished_check();
 }
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
index 979b9ec14..ac80b2fa2 100644
--- a/src/yuzu/applets/web_browser.cpp
+++ b/src/yuzu/applets/web_browser.cpp
@@ -104,12 +104,12 @@ void QtWebBrowser::OpenPage(std::string_view url, std::function<void()> unpack_r
 
 void QtWebBrowser::MainWindowUnpackRomFS() {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     unpack_romfs_callback();
 }
 
 void QtWebBrowser::MainWindowFinishedBrowsing() {
     // Acquire the HLE mutex
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock{HLE::g_hle_lock};
     finished_callback();
 }
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index d2c97b1f8..7438fbc0a 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -24,8 +24,6 @@ void EmuThread::run() {
 
     MicroProfileOnThreadCreate("EmuThread");
 
-    stop_run = false;
-
     emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
 
     Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
@@ -40,7 +38,7 @@ void EmuThread::run() {
         render_window->DoneCurrent();
     }
 
-    // holds whether the cpu was running during the last iteration,
+    // Holds whether the cpu was running during the last iteration,
     // so that the DebugModeLeft signal can be emitted before the
     // next execution step
     bool was_active = false;
@@ -69,7 +67,7 @@ void EmuThread::run() {
 
             was_active = false;
         } else {
-            std::unique_lock<std::mutex> lock(running_mutex);
+            std::unique_lock lock{running_mutex};
             running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; });
         }
     }
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 7226e690e..3183621bc 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -53,7 +53,7 @@ public:
      * @note This function is thread-safe
      */
     void SetRunning(bool running) {
-        std::unique_lock<std::mutex> lock(running_mutex);
+        std::unique_lock lock{running_mutex};
         this->running = running;
         lock.unlock();
         running_cv.notify_all();
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 2e8ebfc12..802db3945 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -434,7 +434,6 @@ void Config::ReadValues() {
 
     qt_config->beginGroup("System");
     Settings::values.use_docked_mode = ReadSetting("use_docked_mode", false).toBool();
-    Settings::values.enable_nfc = ReadSetting("enable_nfc", true).toBool();
 
     Settings::values.current_user =
         std::clamp<int>(ReadSetting("current_user", 0).toInt(), 0, Service::Account::MAX_USERS - 1);
@@ -691,7 +690,6 @@ void Config::SaveValues() {
 
     qt_config->beginGroup("System");
     WriteSetting("use_docked_mode", Settings::values.use_docked_mode, false);
-    WriteSetting("enable_nfc", Settings::values.enable_nfc, true);
     WriteSetting("current_user", Settings::values.current_user, 0);
     WriteSetting("language_index", Settings::values.language_index, 1);
 
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 81a60da08..eeb038afb 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -33,7 +33,6 @@ void ConfigureGeneral::setConfiguration() {
     ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
     ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
     ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit);
-    ui->enable_nfc->setChecked(Settings::values.enable_nfc);
 }
 
 void ConfigureGeneral::applyConfiguration() {
@@ -44,5 +43,4 @@ void ConfigureGeneral::applyConfiguration() {
         ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString();
 
     Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked();
-    Settings::values.enable_nfc = ui->enable_nfc->isChecked();
 }
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index 879ef747f..1a5721fe7 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -71,26 +71,6 @@
       </widget>
      </item>
      <item>
-      <widget class="QGroupBox" name="EmulationGroupBox">
-       <property name="title">
-        <string>Emulation</string>
-       </property>
-       <layout class="QHBoxLayout" name="EmulationHorizontalLayout">
-        <item>
-         <layout class="QVBoxLayout" name="EmulationVerticalLayout">
-          <item>
-           <widget class="QCheckBox" name="enable_nfc">
-            <property name="text">
-             <string>Enable NFC</string>
-            </property>
-           </widget>
-          </item>
-         </layout>
-        </item>
-       </layout>
-      </widget>
-     </item>
-     <item>
       <widget class="QGroupBox" name="theme_group_box">
        <property name="title">
         <string>Theme</string>
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 71683da8e..f2d14becf 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -7,6 +7,7 @@
 #include <QDebug>
 #include <QFileDialog>
 #include <QLabel>
+#include <QMessageBox>
 #include <QMouseEvent>
 #include <QPushButton>
 #include <QScrollArea>
@@ -95,50 +96,91 @@ GraphicsSurfaceWidget::GraphicsSurfaceWidget(std::shared_ptr<Tegra::DebugContext
     surface_picker_y_control = new QSpinBox;
     surface_picker_y_control->setRange(0, max_dimension - 1);
 
-    surface_format_control = new QComboBox;
-
+    // clang-format off
     // Color formats sorted by Maxwell texture format index
-    surface_format_control->addItem(tr("None"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("A8R8G8B8"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("Unknown"));
-    surface_format_control->addItem(tr("DXT1"));
-    surface_format_control->addItem(tr("DXT23"));
-    surface_format_control->addItem(tr("DXT45"));
-    surface_format_control->addItem(tr("DXN1"));
-    surface_format_control->addItem(tr("DXN2"));
+    const QStringList surface_formats{
+        tr("None"),
+        QStringLiteral("R32_G32_B32_A32"),
+        QStringLiteral("R32_G32_B32"),
+        QStringLiteral("R16_G16_B16_A16"),
+        QStringLiteral("R32_G32"),
+        QStringLiteral("R32_B24G8"),
+        QStringLiteral("ETC2_RGB"),
+        QStringLiteral("X8B8G8R8"),
+        QStringLiteral("A8R8G8B8"),
+        QStringLiteral("A2B10G10R10"),
+        QStringLiteral("ETC2_RGB_PTA"),
+        QStringLiteral("ETC2_RGBA"),
+        QStringLiteral("R16_G16"),
+        QStringLiteral("G8R24"),
+        QStringLiteral("G24R8"),
+        QStringLiteral("R32"),
+        QStringLiteral("BC6H_SF16"),
+        QStringLiteral("BC6H_UF16"),
+        QStringLiteral("A4B4G4R4"),
+        QStringLiteral("A5B5G5R1"),
+        QStringLiteral("A1B5G5R5"),
+        QStringLiteral("B5G6R5"),
+        QStringLiteral("B6G5R5"),
+        QStringLiteral("BC7U"),
+        QStringLiteral("G8R8"),
+        QStringLiteral("EAC"),
+        QStringLiteral("EACX2"),
+        QStringLiteral("R16"),
+        QStringLiteral("Y8_VIDEO"),
+        QStringLiteral("R8"),
+        QStringLiteral("G4R4"),
+        QStringLiteral("R1"),
+        QStringLiteral("E5B9G9R9_SHAREDEXP"),
+        QStringLiteral("BF10GF11RF11"),
+        QStringLiteral("G8B8G8R8"),
+        QStringLiteral("B8G8R8G8"),
+        QStringLiteral("DXT1"),
+        QStringLiteral("DXT23"),
+        QStringLiteral("DXT45"),
+        QStringLiteral("DXN1"),
+        QStringLiteral("DXN2"),
+        QStringLiteral("Z24S8"),
+        QStringLiteral("X8Z24"),
+        QStringLiteral("S8Z24"),
+        QStringLiteral("X4V4Z24__COV4R4V"),
+        QStringLiteral("X4V4Z24__COV8R8V"),
+        QStringLiteral("V8Z24__COV4R12V"),
+        QStringLiteral("ZF32"),
+        QStringLiteral("ZF32_X24S8"),
+        QStringLiteral("X8Z24_X20V4S8__COV4R4V"),
+        QStringLiteral("X8Z24_X20V4S8__COV8R8V"),
+        QStringLiteral("ZF32_X20V4X8__COV4R4V"),
+        QStringLiteral("ZF32_X20V4X8__COV8R8V"),
+        QStringLiteral("ZF32_X20V4S8__COV4R4V"),
+        QStringLiteral("ZF32_X20V4S8__COV8R8V"),
+        QStringLiteral("X8Z24_X16V8S8__COV4R12V"),
+        QStringLiteral("ZF32_X16V8X8__COV4R12V"),
+        QStringLiteral("ZF32_X16V8S8__COV4R12V"),
+        QStringLiteral("Z16"),
+        QStringLiteral("V8Z24__COV8R24V"),
+        QStringLiteral("X8Z24_X16V8S8__COV8R24V"),
+        QStringLiteral("ZF32_X16V8X8__COV8R24V"),
+        QStringLiteral("ZF32_X16V8S8__COV8R24V"),
+        QStringLiteral("ASTC_2D_4X4"),
+        QStringLiteral("ASTC_2D_5X5"),
+        QStringLiteral("ASTC_2D_6X6"),
+        QStringLiteral("ASTC_2D_8X8"),
+        QStringLiteral("ASTC_2D_10X10"),
+        QStringLiteral("ASTC_2D_12X12"),
+        QStringLiteral("ASTC_2D_5X4"),
+        QStringLiteral("ASTC_2D_6X5"),
+        QStringLiteral("ASTC_2D_8X6"),
+        QStringLiteral("ASTC_2D_10X8"),
+        QStringLiteral("ASTC_2D_12X10"),
+        QStringLiteral("ASTC_2D_8X5"),
+        QStringLiteral("ASTC_2D_10X5"),
+        QStringLiteral("ASTC_2D_10X6"),
+    };
+    // clang-format on
+
+    surface_format_control = new QComboBox;
+    surface_format_control->addItems(surface_formats);
 
     surface_info_label = new QLabel();
     surface_info_label->setWordWrap(true);
@@ -157,22 +199,20 @@ GraphicsSurfaceWidget::GraphicsSurfaceWidget(std::shared_ptr<Tegra::DebugContext
 
     // Connections
     connect(this, &GraphicsSurfaceWidget::Update, this, &GraphicsSurfaceWidget::OnUpdate);
-    connect(surface_source_list,
-            static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this,
+    connect(surface_source_list, qOverload<int>(&QComboBox::currentIndexChanged), this,
             &GraphicsSurfaceWidget::OnSurfaceSourceChanged);
     connect(surface_address_control, &CSpinBox::ValueChanged, this,
             &GraphicsSurfaceWidget::OnSurfaceAddressChanged);
-    connect(surface_width_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
-            this, &GraphicsSurfaceWidget::OnSurfaceWidthChanged);
-    connect(surface_height_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
-            this, &GraphicsSurfaceWidget::OnSurfaceHeightChanged);
-    connect(surface_format_control,
-            static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this,
+    connect(surface_width_control, qOverload<int>(&QSpinBox::valueChanged), this,
+            &GraphicsSurfaceWidget::OnSurfaceWidthChanged);
+    connect(surface_height_control, qOverload<int>(&QSpinBox::valueChanged), this,
+            &GraphicsSurfaceWidget::OnSurfaceHeightChanged);
+    connect(surface_format_control, qOverload<int>(&QComboBox::currentIndexChanged), this,
             &GraphicsSurfaceWidget::OnSurfaceFormatChanged);
-    connect(surface_picker_x_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
-            this, &GraphicsSurfaceWidget::OnSurfacePickerXChanged);
-    connect(surface_picker_y_control, static_cast<void (QSpinBox::*)(int)>(&QSpinBox::valueChanged),
-            this, &GraphicsSurfaceWidget::OnSurfacePickerYChanged);
+    connect(surface_picker_x_control, qOverload<int>(&QSpinBox::valueChanged), this,
+            &GraphicsSurfaceWidget::OnSurfacePickerXChanged);
+    connect(surface_picker_y_control, qOverload<int>(&QSpinBox::valueChanged), this,
+            &GraphicsSurfaceWidget::OnSurfacePickerYChanged);
     connect(save_surface, &QPushButton::clicked, this, &GraphicsSurfaceWidget::SaveSurface);
 
     auto main_widget = new QWidget;
@@ -261,7 +301,7 @@ void GraphicsSurfaceWidget::OnSurfaceSourceChanged(int new_value) {
 
 void GraphicsSurfaceWidget::OnSurfaceAddressChanged(qint64 new_value) {
     if (surface_address != new_value) {
-        surface_address = static_cast<Tegra::GPUVAddr>(new_value);
+        surface_address = static_cast<GPUVAddr>(new_value);
 
         surface_source_list->setCurrentIndex(static_cast<int>(Source::Custom));
         emit Update();
@@ -383,13 +423,12 @@ void GraphicsSurfaceWidget::OnUpdate() {
     // TODO: Implement a good way to visualize alpha components!
 
     QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
-    std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
 
     // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
     // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
     auto unswizzled_data = Tegra::Texture::UnswizzleTexture(
-        *address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width,
-        surface_height, 1U);
+        gpu.MemoryManager().GetPointer(surface_address), 1, 1,
+        Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height, 1U);
 
     auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
                                                       surface_width, surface_height);
@@ -421,40 +460,56 @@ void GraphicsSurfaceWidget::OnUpdate() {
 }
 
 void GraphicsSurfaceWidget::SaveSurface() {
-    QString png_filter = tr("Portable Network Graphic (*.png)");
-    QString bin_filter = tr("Binary data (*.bin)");
+    const QString png_filter = tr("Portable Network Graphic (*.png)");
+    const QString bin_filter = tr("Binary data (*.bin)");
 
-    QString selectedFilter;
-    QString filename = QFileDialog::getSaveFileName(
+    QString selected_filter;
+    const QString filename = QFileDialog::getSaveFileName(
         this, tr("Save Surface"),
-        QString("texture-0x%1.png").arg(QString::number(surface_address, 16)),
-        QString("%1;;%2").arg(png_filter, bin_filter), &selectedFilter);
+        QStringLiteral("texture-0x%1.png").arg(QString::number(surface_address, 16)),
+        QStringLiteral("%1;;%2").arg(png_filter, bin_filter), &selected_filter);
 
     if (filename.isEmpty()) {
         // If the user canceled the dialog, don't save anything.
         return;
     }
 
-    if (selectedFilter == png_filter) {
-        const QPixmap* pixmap = surface_picture_label->pixmap();
+    if (selected_filter == png_filter) {
+        const QPixmap* const pixmap = surface_picture_label->pixmap();
         ASSERT_MSG(pixmap != nullptr, "No pixmap set");
 
-        QFile file(filename);
-        file.open(QIODevice::WriteOnly);
-        if (pixmap)
-            pixmap->save(&file, "PNG");
-    } else if (selectedFilter == bin_filter) {
+        QFile file{filename};
+        if (!file.open(QIODevice::WriteOnly)) {
+            QMessageBox::warning(this, tr("Error"), tr("Failed to open file '%1'").arg(filename));
+            return;
+        }
+
+        if (!pixmap->save(&file, "PNG")) {
+            QMessageBox::warning(this, tr("Error"),
+                                 tr("Failed to save surface data to file '%1'").arg(filename));
+        }
+    } else if (selected_filter == bin_filter) {
         auto& gpu = Core::System::GetInstance().GPU();
-        std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
+        const std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
 
-        const u8* buffer = Memory::GetPointer(*address);
+        const u8* const buffer = Memory::GetPointer(*address);
         ASSERT_MSG(buffer != nullptr, "Memory not accessible");
 
-        QFile file(filename);
-        file.open(QIODevice::WriteOnly);
-        int size = surface_width * surface_height * Tegra::Texture::BytesPerPixel(surface_format);
-        QByteArray data(reinterpret_cast<const char*>(buffer), size);
-        file.write(data);
+        QFile file{filename};
+        if (!file.open(QIODevice::WriteOnly)) {
+            QMessageBox::warning(this, tr("Error"), tr("Failed to open file '%1'").arg(filename));
+            return;
+        }
+
+        const int size =
+            surface_width * surface_height * Tegra::Texture::BytesPerPixel(surface_format);
+        const QByteArray data(reinterpret_cast<const char*>(buffer), size);
+        if (file.write(data) != data.size()) {
+            QMessageBox::warning(
+                this, tr("Error"),
+                tr("Failed to completely write surface data to file. The saved data will "
+                   "likely be corrupt."));
+        }
     } else {
         UNREACHABLE_MSG("Unhandled filter selected");
     }
diff --git a/src/yuzu/debugger/graphics/graphics_surface.h b/src/yuzu/debugger/graphics/graphics_surface.h
index 323e39d94..89445b18f 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.h
+++ b/src/yuzu/debugger/graphics/graphics_surface.h
@@ -87,7 +87,7 @@ private:
     QPushButton* save_surface;
 
     Source surface_source;
-    Tegra::GPUVAddr surface_address;
+    GPUVAddr surface_address;
     unsigned surface_width;
     unsigned surface_height;
     Tegra::Texture::TextureFormat surface_format;
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index 8b30e0a85..86e03e46d 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -7,6 +7,7 @@
 #include <QMouseEvent>
 #include <QPainter>
 #include <QString>
+#include <QTimer>
 #include "common/common_types.h"
 #include "common/microprofile.h"
 #include "yuzu/debugger/profiler.h"
diff --git a/src/yuzu/debugger/profiler.h b/src/yuzu/debugger/profiler.h
index eae1e9e3c..8e69fdb06 100644
--- a/src/yuzu/debugger/profiler.h
+++ b/src/yuzu/debugger/profiler.h
@@ -4,10 +4,11 @@
 
 #pragma once
 
-#include <QAbstractItemModel>
-#include <QDockWidget>
-#include <QTimer>
-#include "common/microprofile.h"
+#include <QWidget>
+
+class QAction;
+class QHideEvent;
+class QShowEvent;
 
 class MicroProfileDialog : public QWidget {
     Q_OBJECT
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 06ad74ffe..593bb681f 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -234,6 +234,9 @@ QString WaitTreeThread::GetText() const {
     case Kernel::ThreadStatus::WaitMutex:
         status = tr("waiting for mutex");
         break;
+    case Kernel::ThreadStatus::WaitCondVar:
+        status = tr("waiting for condition variable");
+        break;
     case Kernel::ThreadStatus::WaitArb:
         status = tr("waiting for address arbiter");
         break;
@@ -269,6 +272,7 @@ QColor WaitTreeThread::GetColor() const {
     case Kernel::ThreadStatus::WaitSynchAll:
     case Kernel::ThreadStatus::WaitSynchAny:
     case Kernel::ThreadStatus::WaitMutex:
+    case Kernel::ThreadStatus::WaitCondVar:
     case Kernel::ThreadStatus::WaitArb:
         return QColor(Qt::GlobalColor::red);
     case Kernel::ThreadStatus::Dormant:
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index c0e3c5fa9..4422a572b 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -329,6 +329,8 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
     QMenu context_menu;
     QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));
     QAction* open_lfs_location = context_menu.addAction(tr("Open Mod Data Location"));
+    QAction* open_transferable_shader_cache =
+        context_menu.addAction(tr("Open Transferable Shader Cache"));
     context_menu.addSeparator();
     QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS"));
     QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard"));
@@ -344,6 +346,8 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
             [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData); });
     connect(open_lfs_location, &QAction::triggered,
             [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::ModData); });
+    connect(open_transferable_shader_cache, &QAction::triggered,
+            [&]() { emit OpenTransferableShaderCacheRequested(program_id); });
     connect(dump_romfs, &QAction::triggered, [&]() { emit DumpRomFSRequested(program_id, path); });
     connect(copy_tid, &QAction::triggered, [&]() { emit CopyTIDRequested(program_id); });
     connect(navigate_to_gamedb_entry, &QAction::triggered,
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index b317eb2fc..8ea5cbaaa 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -66,6 +66,7 @@ signals:
     void GameChosen(QString game_path);
     void ShouldCancelWorker();
     void OpenFolderRequested(u64 program_id, GameListOpenTarget target);
+    void OpenTransferableShaderCacheRequested(u64 program_id);
     void DumpRomFSRequested(u64 program_id, const std::string& game_path);
     void CopyTIDRequested(u64 program_id);
     void NavigateToGamedbEntryRequested(u64 program_id,
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 86f6d0165..4e2d988cd 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -192,7 +192,12 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size
     }
 
     // update labels and progress bar
-    ui->stage->setText(stage_translations[stage].arg(value).arg(total));
+    if (stage == VideoCore::LoadCallbackStage::Decompile ||
+        stage == VideoCore::LoadCallbackStage::Build) {
+        ui->stage->setText(stage_translations[stage].arg(value).arg(total));
+    } else {
+        ui->stage->setText(stage_translations[stage]);
+    }
     ui->value->setText(estimate);
     ui->progress_bar->setValue(static_cast<int>(value));
     previous_time = now;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 0bda2239f..77b6f7cc8 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -37,14 +37,20 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include <glad/glad.h>
 
 #define QT_NO_OPENGL
+#include <QClipboard>
+#include <QDesktopServices>
 #include <QDesktopWidget>
 #include <QDialogButtonBox>
 #include <QFile>
 #include <QFileDialog>
+#include <QInputDialog>
 #include <QMessageBox>
+#include <QProgressBar>
+#include <QProgressDialog>
+#include <QShortcut>
+#include <QStatusBar>
 #include <QtConcurrent/QtConcurrent>
-#include <QtGui>
-#include <QtWidgets>
+
 #include <fmt/format.h>
 #include "common/common_paths.h"
 #include "common/detached_tasks.h"
@@ -55,11 +61,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include "common/microprofile.h"
 #include "common/scm_rev.h"
 #include "common/scope_exit.h"
-#include "common/string_util.h"
 #include "common/telemetry.h"
 #include "core/core.h"
 #include "core/crypto/key_manager.h"
-#include "core/file_sys/bis_factory.h"
 #include "core/file_sys/card_image.h"
 #include "core/file_sys/content_archive.h"
 #include "core/file_sys/control_metadata.h"
@@ -71,7 +75,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include "core/frontend/applets/software_keyboard.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/service/filesystem/filesystem.h"
-#include "core/hle/service/filesystem/fsp_ldr.h"
 #include "core/hle/service/nfp/nfp.h"
 #include "core/hle/service/sm/sm.h"
 #include "core/loader/loader.h"
@@ -649,6 +652,8 @@ void GMainWindow::RestoreUIState() {
 void GMainWindow::ConnectWidgetEvents() {
     connect(game_list, &GameList::GameChosen, this, &GMainWindow::OnGameListLoadFile);
     connect(game_list, &GameList::OpenFolderRequested, this, &GMainWindow::OnGameListOpenFolder);
+    connect(game_list, &GameList::OpenTransferableShaderCacheRequested, this,
+            &GMainWindow::OnTransferableShaderCacheOpenFile);
     connect(game_list, &GameList::DumpRomFSRequested, this, &GMainWindow::OnGameListDumpRomFS);
     connect(game_list, &GameList::CopyTIDRequested, this, &GMainWindow::OnGameListCopyTID);
     connect(game_list, &GameList::NavigateToGamedbEntryRequested, this,
@@ -1082,6 +1087,39 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target
     QDesktopServices::openUrl(QUrl::fromLocalFile(qpath));
 }
 
+void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) {
+    ASSERT(program_id != 0);
+
+    const QString tranferable_shader_cache_folder_path =
+        QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) + "opengl" +
+        DIR_SEP + "transferable";
+
+    const QString transferable_shader_cache_file_path =
+        tranferable_shader_cache_folder_path + DIR_SEP +
+        QString::fromStdString(fmt::format("{:016X}.bin", program_id));
+
+    if (!QFile::exists(transferable_shader_cache_file_path)) {
+        QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"),
+                             tr("A shader cache for this title does not exist."));
+        return;
+    }
+
+    // Windows supports opening a folder with selecting a specified file in explorer. On every other
+    // OS we just open the transferable shader cache folder without preselecting the transferable
+    // shader cache file for the selected game.
+#if defined(Q_OS_WIN)
+    const QString explorer = QStringLiteral("explorer");
+    QStringList param;
+    if (!QFileInfo(transferable_shader_cache_file_path).isDir()) {
+        param << QStringLiteral("/select,");
+    }
+    param << QDir::toNativeSeparators(transferable_shader_cache_file_path);
+    QProcess::startDetached(explorer, param);
+#else
+    QDesktopServices::openUrl(QUrl::fromLocalFile(tranferable_shader_cache_folder_path));
+#endif
+}
+
 static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) {
     std::size_t out = 0;
 
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 90cd79bca..ba406ae64 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -175,6 +175,7 @@ private slots:
     /// Called whenever a user selects a game in the game list widget.
     void OnGameListLoadFile(QString game_path);
     void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target);
+    void OnTransferableShaderCacheOpenFile(u64 program_id);
     void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
     void OnGameListCopyTID(u64 program_id);
     void OnGameListNavigateToGamedbEntry(u64 program_id,
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 32e78049c..f24cc77fe 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -319,7 +319,6 @@ void Config::ReadValues() {
 
     // System
     Settings::values.use_docked_mode = sdl2_config->GetBoolean("System", "use_docked_mode", false);
-    Settings::values.enable_nfc = sdl2_config->GetBoolean("System", "enable_nfc", true);
     const auto size = sdl2_config->GetInteger("System", "users_size", 0);
 
     Settings::values.current_user = std::clamp<int>(
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index c6c66a787..245f25847 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -114,9 +114,9 @@ int main(int argc, char** argv) {
     };
 
     while (optind < argc) {
-        char arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index);
+        int arg = getopt_long(argc, argv, "g:fhvp::", long_options, &option_index);
         if (arg != -1) {
-            switch (arg) {
+            switch (static_cast<char>(arg)) {
             case 'g':
                 errno = 0;
                 gdb_port = strtoul(optarg, &endarg, 0);