250 files changed, 5055 insertions, 1723 deletions
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..ab861a396
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,4 @@
+dist/languages/* linguist-vendored
+dist/qt_themes/* linguist-vendored
+externals/* linguist-vendored
+*.h linguist-language=cpp
diff --git a/.travis.yml b/.travis.yml
index dee34a8e3..b0fbe3c5f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,11 +24,24 @@ matrix:
     - os: osx
       env: NAME="macos build"
       sudo: false
-      osx_image: xcode9.3
+      osx_image: xcode10
       install: "./.travis/macos/deps.sh"
       script: "./.travis/macos/build.sh"
       after_success: "./.travis/macos/upload.sh"
       cache: ccache
+    - os: linux
+      env: NAME="MinGW build"
+      sudo: required
+      dist: trusty
+      services: docker
+      addons:
+        apt:
+          packages:
+            - p7zip-full
+      install: "./.travis/linux-mingw/deps.sh"
+      script: "./.travis/linux-mingw/build.sh"
+      after_success: "./.travis/linux-mingw/upload.sh"
+      cache: ccache
 
 deploy:
   provider: releases
diff --git a/.travis/common/post-upload.sh b/.travis/common/post-upload.sh
index 90deaaec8..28735a9cf 100755
--- a/.travis/common/post-upload.sh
+++ b/.travis/common/post-upload.sh
@@ -11,6 +11,9 @@ if [ -z $TRAVIS_TAG ]; then
     RELEASE_NAME=head
 else
     RELEASE_NAME=$(echo $TRAVIS_TAG | cut -d- -f1)
+    if [ "$NAME" = "MinGW build" ]; then
+        RELEASE_NAME="${RELEASE_NAME}-mingw"
+    fi
 fi
 
 mv "$REV_NAME" $RELEASE_NAME
diff --git a/.travis/linux-mingw/build.sh b/.travis/linux-mingw/build.sh
new file mode 100755
index 000000000..be03cc0f3
--- /dev/null
+++ b/.travis/linux-mingw/build.sh
@@ -0,0 +1,3 @@
+#!/bin/bash -ex
+mkdir "$HOME/.ccache" || true
+docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh
diff --git a/.travis/linux-mingw/deps.sh b/.travis/linux-mingw/deps.sh
new file mode 100755
index 000000000..540bb934a
--- /dev/null
+++ b/.travis/linux-mingw/deps.sh
@@ -0,0 +1,3 @@
+#!/bin/sh -ex
+
+docker pull ubuntu:18.04
diff --git a/.travis/linux-mingw/docker.sh b/.travis/linux-mingw/docker.sh
new file mode 100755
index 000000000..d15c3f6e8
--- /dev/null
+++ b/.travis/linux-mingw/docker.sh
@@ -0,0 +1,59 @@
+#!/bin/bash -ex
+
+cd /yuzu
+MINGW_PACKAGES="sdl2-mingw-w64 qt5base-mingw-w64 qt5tools-mingw-w64 libsamplerate-mingw-w64 qt5multimedia-mingw-w64"
+apt-get update
+apt-get install -y gpg wget git python3-pip python ccache g++-mingw-w64-x86-64 gcc-mingw-w64-x86-64 mingw-w64-tools cmake
+echo 'deb http://ppa.launchpad.net/tobydox/mingw-w64/ubuntu bionic main ' > /etc/apt/sources.list.d/extras.list
+apt-key adv --keyserver keyserver.ubuntu.com --recv '72931B477E22FEFD47F8DECE02FE5F12ADDE29B2'
+apt-get update
+apt-get install -y ${MINGW_PACKAGES}
+
+# fix a problem in current MinGW headers
+wget -q https://raw.githubusercontent.com/Alexpux/mingw-w64/d0d7f784833bbb0b2d279310ddc6afb52fe47a46/mingw-w64-headers/crt/errno.h -O /usr/x86_64-w64-mingw32/include/errno.h
+# override Travis CI unreasonable ccache size
+echo 'max_size = 3.0G' > "$HOME/.ccache/ccache.conf"
+
+# Dirty hack to trick unicorn makefile into believing we are in a MINGW system
+mv /bin/uname /bin/uname1 && echo -e '#!/bin/sh\necho MINGW64' >> /bin/uname
+chmod +x /bin/uname
+
+# Dirty hack to trick unicorn makefile into believing we have cmd
+echo '' >> /bin/cmd
+chmod +x /bin/cmd
+
+mkdir build && cd build
+cmake .. -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
+make -j4
+
+# Clean up the dirty hacks
+rm /bin/uname && mv /bin/uname1 /bin/uname
+rm /bin/cmd
+
+ccache -s
+
+echo "Tests skipped"
+#ctest -VV -C Release
+
+echo 'Prepare binaries...'
+cd ..
+mkdir package
+
+QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/'
+find build/ -name "yuzu*.exe" -exec cp {} 'package' \;
+
+# copy Qt plugins
+mkdir package/platforms
+cp "${QT_PLATFORM_DLL_PATH}/qwindows.dll" package/platforms/
+cp -rv "${QT_PLATFORM_DLL_PATH}/../mediaservice/" package/
+cp -rv "${QT_PLATFORM_DLL_PATH}/../imageformats/" package/
+rm -f package/mediaservice/*d.dll
+
+for i in package/*.exe; do
+  # we need to process pdb here, however, cv2pdb
+  # does not work here, so we just simply strip all the debug symbols
+  x86_64-w64-mingw32-strip "${i}"
+done
+
+pip3 install pefile
+python3 .travis/linux-mingw/scan_dll.py package/*.exe "package/"
diff --git a/.travis/linux-mingw/scan_dll.py b/.travis/linux-mingw/scan_dll.py
new file mode 100644
index 000000000..163183f2e
--- /dev/null
+++ b/.travis/linux-mingw/scan_dll.py
@@ -0,0 +1,106 @@
+import pefile
+import sys
+import re
+import os
+import queue
+import shutil
+
+# constant definitions
+KNOWN_SYS_DLLS = ['WINMM.DLL', 'MSVCRT.DLL', 'VERSION.DLL', 'MPR.DLL',
+                  'DWMAPI.DLL', 'UXTHEME.DLL', 'DNSAPI.DLL', 'IPHLPAPI.DLL']
+# below is for Ubuntu 18.04 with specified PPA enabled, if you are using
+# other distro or different repositories, change the following accordingly
+DLL_PATH = [
+    '/usr/x86_64-w64-mingw32/bin/',
+    '/usr/x86_64-w64-mingw32/lib/',
+    '/usr/lib/gcc/x86_64-w64-mingw32/7.3-posix/'
+]
+
+missing = []
+
+
+def parse_imports(file_name):
+    results = []
+    pe = pefile.PE(file_name, fast_load=True)
+    pe.parse_data_directories()
+
+    for entry in pe.DIRECTORY_ENTRY_IMPORT:
+        current = entry.dll.decode()
+        current_u = current.upper()  # b/c Windows is often case insensitive
+        # here we filter out system dlls
+        # dll w/ names like *32.dll are likely to be system dlls
+        if current_u.upper() not in KNOWN_SYS_DLLS and not re.match(string=current_u, pattern=r'.*32\.DLL'):
+            results.append(current)
+
+    return results
+
+
+def parse_imports_recursive(file_name, path_list=[]):
+    q = queue.Queue()  # create a FIFO queue
+    # file_name can be a string or a list for the convience
+    if isinstance(file_name, str):
+        q.put(file_name)
+    elif isinstance(file_name, list):
+        for i in file_name:
+            q.put(i)
+    full_list = []
+    while q.qsize():
+        current = q.get_nowait()
+        print('> %s' % current)
+        deps = parse_imports(current)
+        # if this dll does not have any import, ignore it
+        if not deps:
+            continue
+        for dep in deps:
+            # the dependency already included in the list, skip
+            if dep in full_list:
+                continue
+            # find the requested dll in the provided paths
+            full_path = find_dll(dep)
+            if not full_path:
+                missing.append(dep)
+                continue
+            full_list.append(dep)
+            q.put(full_path)
+            path_list.append(full_path)
+    return full_list
+
+
+def find_dll(name):
+    for path in DLL_PATH:
+        for root, _, files in os.walk(path):
+            for f in files:
+                if name.lower() == f.lower():
+                    return os.path.join(root, f)
+
+
+def deploy(name, dst, dry_run=False):
+    dlls_path = []
+    parse_imports_recursive(name, dlls_path)
+    for dll_entry in dlls_path:
+        if not dry_run:
+            shutil.copy(dll_entry, dst)
+        else:
+            print('[Dry-Run] Copy %s to %s' % (dll_entry, dst))
+    print('Deploy completed.')
+    return dlls_path
+
+
+def main():
+    if len(sys.argv) < 3:
+        print('Usage: %s [files to examine ...] [target deploy directory]')
+        return 1
+    to_deploy = sys.argv[1:-1]
+    tgt_dir = sys.argv[-1]
+    if not os.path.isdir(tgt_dir):
+        print('%s is not a directory.' % tgt_dir)
+        return 1
+    print('Scanning dependencies...')
+    deploy(to_deploy, tgt_dir)
+    if missing:
+        print('Following DLLs are not found: %s' % ('\n'.join(missing)))
+    return 0
+
+
+if __name__ == '__main__':
+    main()
diff --git a/.travis/linux-mingw/upload.sh b/.travis/linux-mingw/upload.sh
new file mode 100755
index 000000000..66e896bc4
--- /dev/null
+++ b/.travis/linux-mingw/upload.sh
@@ -0,0 +1,13 @@
+#!/bin/bash -ex
+
+. .travis/common/pre-upload.sh
+
+REV_NAME="yuzu-windows-mingw-${GITDATE}-${GITREV}"
+ARCHIVE_NAME="${REV_NAME}.tar.gz"
+COMPRESSION_FLAGS="-czvf"
+
+mkdir "$REV_NAME"
+# get around the permission issues
+cp -r package/* "$REV_NAME"
+
+. .travis/common/post-upload.sh
diff --git a/.travis/linux/docker.sh b/.travis/linux/docker.sh
index 459d6bc75..892d2480a 100755
--- a/.travis/linux/docker.sh
+++ b/.travis/linux/docker.sh
@@ -6,7 +6,9 @@ apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev
 cd /yuzu
 
 mkdir build && cd build
-cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -G Ninja
+cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -G Ninja
 ninja
 
+ccache -s
+
 ctest -VV -C Release
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh
index b76a153be..e68dc1400 100755
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -2,14 +2,16 @@
 
 set -o pipefail
 
-export MACOSX_DEPLOYMENT_TARGET=10.12
+export MACOSX_DEPLOYMENT_TARGET=10.13
 export Qt5_DIR=$(brew --prefix)/opt/qt5
 export UNICORNDIR=$(pwd)/externals/unicorn
 export PATH="/usr/local/opt/ccache/libexec:$PATH"
 
 mkdir build && cd build
 cmake --version
-cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON
+cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON
 make -j4
 
+ccache -s
+
 ctest -VV -C Release
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 500d099fc..cd990188e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -123,8 +123,6 @@ else()
     # Avoid windows.h from including some usually unused libs like winsocks.h, since this might cause some redefinition errors.
     add_definitions(/DWIN32_LEAN_AND_MEAN)
 
-    # set up output paths for executable binaries (.exe-files, and .dll-files on DLL-capable platforms)
-    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
     set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE)
 
     # Tweak optimization settings
@@ -269,10 +267,18 @@ if (YUZU_USE_BUNDLED_UNICORN)
 
         find_package(PythonInterp 2.7 REQUIRED)
 
-        add_custom_command(OUTPUT ${LIBUNICORN_LIBRARY}
-            COMMAND ${CMAKE_COMMAND} -E env UNICORN_ARCHS="aarch64" PYTHON="${PYTHON_EXECUTABLE}" /bin/sh make.sh macos-universal-no
-            WORKING_DIRECTORY ${UNICORN_PREFIX}
-        )
+        if (MINGW)
+            add_custom_command(OUTPUT ${LIBUNICORN_LIBRARY}
+                COMMAND ${CMAKE_COMMAND} -E env UNICORN_ARCHS="aarch64" PYTHON="${PYTHON_EXECUTABLE}" /bin/sh make.sh cross-win64
+                WORKING_DIRECTORY ${UNICORN_PREFIX}
+            )
+        else()
+            add_custom_command(OUTPUT ${LIBUNICORN_LIBRARY}
+                COMMAND ${CMAKE_COMMAND} -E env UNICORN_ARCHS="aarch64" PYTHON="${PYTHON_EXECUTABLE}" /bin/sh make.sh macos-universal-no
+                WORKING_DIRECTORY ${UNICORN_PREFIX}
+            )
+        endif()
+
         # ALL makes this custom target build every time
         # but it won't actually build if LIBUNICORN_LIBRARY is up to date
         add_custom_target(unicorn-build ALL
@@ -286,6 +292,7 @@ endif()
 
 if (UNICORN_FOUND)
     add_library(unicorn INTERFACE)
+    add_dependencies(unicorn unicorn-build)
     target_link_libraries(unicorn INTERFACE "${LIBUNICORN_LIBRARY}")
     target_include_directories(unicorn INTERFACE "${LIBUNICORN_INCLUDE_DIR}")
 else()
@@ -431,8 +438,12 @@ enable_testing()
 add_subdirectory(externals)
 add_subdirectory(src)
 
-# Set yuzu project as default StartUp Project in Visual Studio
-set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT yuzu)
+# Set yuzu project or yuzu-cmd project as default StartUp Project in Visual Studio depending on whether QT is enabled or not
+if(ENABLE_QT)
+    set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT yuzu)
+else()
+    set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT yuzu-cmd)
+endif()
 
 
 # Installation instructions
diff --git a/CMakeModules/MinGWCross.cmake b/CMakeModules/MinGWCross.cmake
new file mode 100644
index 000000000..29ecd1ac4
--- /dev/null
+++ b/CMakeModules/MinGWCross.cmake
@@ -0,0 +1,54 @@
+set(MINGW_PREFIX   /usr/x86_64-w64-mingw32/)
+set(CMAKE_SYSTEM_NAME               Windows)
+set(CMAKE_SYSTEM_PROCESSOR           x86_64)
+# Actually a hack, w/o this will cause some strange errors
+set(CMAKE_HOST_WIN32                 TRUE)
+
+
+set(CMAKE_FIND_ROOT_PATH            ${MINGW_PREFIX})
+set(SDL2_PATH                       ${MINGW_PREFIX})
+set(MINGW_TOOL_PREFIX               ${CMAKE_SYSTEM_PROCESSOR}-w64-mingw32-)
+
+# Specify the cross compiler
+set(CMAKE_C_COMPILER            ${MINGW_TOOL_PREFIX}gcc-posix)
+set(CMAKE_CXX_COMPILER          ${MINGW_TOOL_PREFIX}g++-posix)
+set(CMAKE_RC_COMPILER           ${MINGW_TOOL_PREFIX}windres)
+
+# Mingw tools
+set(STRIP                       ${MINGW_TOOL_PREFIX}strip)
+set(WINDRES                     ${MINGW_TOOL_PREFIX}windres)
+set(ENV{PKG_CONFIG}             ${MINGW_TOOL_PREFIX}pkg-config)
+
+# ccache wrapper
+option(USE_CCACHE "Use ccache for compilation" OFF)
+if(USE_CCACHE)
+    find_program(CCACHE ccache)
+    if(CCACHE)
+        message(STATUS "Using ccache found in PATH")
+        set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE})
+        set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE})
+    else(CCACHE)
+        message(WARNING "USE_CCACHE enabled, but no ccache found")
+    endif(CCACHE)
+endif(USE_CCACHE)
+
+# Search for programs in the build host directories
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+
+
+# Echo modified cmake vars to screen for debugging purposes
+if(NOT DEFINED ENV{MINGW_DEBUG_INFO})
+        message("")
+        message("Custom cmake vars: (blank = system default)")
+        message("-----------------------------------------")
+        message("* CMAKE_C_COMPILER                     : ${CMAKE_C_COMPILER}")
+        message("* CMAKE_CXX_COMPILER                   : ${CMAKE_CXX_COMPILER}")
+        message("* CMAKE_RC_COMPILER                    : ${CMAKE_RC_COMPILER}")
+        message("* WINDRES                              : ${WINDRES}")
+        message("* ENV{PKG_CONFIG}                      : $ENV{PKG_CONFIG}")
+        message("* STRIP                                : ${STRIP}")
+        message("* USE_CCACHE                           : ${USE_CCACHE}")
+        message("")
+        # So that the debug info only appears once
+        set(ENV{MINGW_DEBUG_INFO} SHOWN)
+endif()
diff --git a/externals/dynarmic b/externals/dynarmic
-Subproject 959446573f3adfcba173ef4b0011a4a280f18eb
+Subproject 171d11659d760a4d4674d3a90698fe31ea407e2
diff --git a/src/audio_core/algorithm/filter.cpp b/src/audio_core/algorithm/filter.cpp
index 9fcd0614d..f65bf64f7 100644
--- a/src/audio_core/algorithm/filter.cpp
+++ b/src/audio_core/algorithm/filter.cpp
@@ -35,12 +35,12 @@ Filter::Filter(double a0, double a1, double a2, double b0, double b1, double b2)
     : a1(a1 / a0), a2(a2 / a0), b0(b0 / a0), b1(b1 / a0), b2(b2 / a0) {}
 
 void Filter::Process(std::vector<s16>& signal) {
-    const size_t num_frames = signal.size() / 2;
-    for (size_t i = 0; i < num_frames; i++) {
+    const std::size_t num_frames = signal.size() / 2;
+    for (std::size_t i = 0; i < num_frames; i++) {
         std::rotate(in.begin(), in.end() - 1, in.end());
         std::rotate(out.begin(), out.end() - 1, out.end());
 
-        for (size_t ch = 0; ch < channel_count; ch++) {
+        for (std::size_t ch = 0; ch < channel_count; ch++) {
             in[0][ch] = signal[i * channel_count + ch];
 
             out[0][ch] = b0 * in[0][ch] + b1 * in[1][ch] + b2 * in[2][ch] - a1 * out[1][ch] -
@@ -54,14 +54,14 @@ void Filter::Process(std::vector<s16>& signal) {
 /// Calculates the appropriate Q for each biquad in a cascading filter.
 /// @param total_count The total number of biquads to be cascaded.
 /// @param index 0-index of the biquad to calculate the Q value for.
-static double CascadingBiquadQ(size_t total_count, size_t index) {
+static double CascadingBiquadQ(std::size_t total_count, std::size_t index) {
     const double pole = M_PI * (2 * index + 1) / (4.0 * total_count);
     return 1.0 / (2.0 * std::cos(pole));
 }
 
-CascadingFilter CascadingFilter::LowPass(double cutoff, size_t cascade_size) {
+CascadingFilter CascadingFilter::LowPass(double cutoff, std::size_t cascade_size) {
     std::vector<Filter> cascade(cascade_size);
-    for (size_t i = 0; i < cascade_size; i++) {
+    for (std::size_t i = 0; i < cascade_size; i++) {
         cascade[i] = Filter::LowPass(cutoff, CascadingBiquadQ(cascade_size, i));
     }
     return CascadingFilter{std::move(cascade)};
diff --git a/src/audio_core/algorithm/filter.h b/src/audio_core/algorithm/filter.h
index a41beef98..3546d149b 100644
--- a/src/audio_core/algorithm/filter.h
+++ b/src/audio_core/algorithm/filter.h
@@ -30,7 +30,7 @@ public:
     void Process(std::vector<s16>& signal);
 
 private:
-    static constexpr size_t channel_count = 2;
+    static constexpr std::size_t channel_count = 2;
 
     /// Coefficients are in normalized form (a0 = 1.0).
     double a1, a2, b0, b1, b2;
@@ -46,7 +46,7 @@ public:
     /// Creates a cascading low-pass filter.
     /// @param cutoff Determines the cutoff frequency. A value from 0.0 to 1.0.
     /// @param cascade_size Number of biquads in cascade.
-    static CascadingFilter LowPass(double cutoff, size_t cascade_size);
+    static CascadingFilter LowPass(double cutoff, std::size_t cascade_size);
 
     /// Passthrough.
     CascadingFilter();
diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp
index 11459821f..3aea9b0f2 100644
--- a/src/audio_core/algorithm/interpolate.cpp
+++ b/src/audio_core/algorithm/interpolate.cpp
@@ -14,7 +14,7 @@
 namespace AudioCore {
 
 /// The Lanczos kernel
-static double Lanczos(size_t a, double x) {
+static double Lanczos(std::size_t a, double x) {
     if (x == 0.0)
         return 1.0;
     const double px = M_PI * x;
@@ -37,15 +37,15 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input,
     }
     state.nyquist.Process(input);
 
-    constexpr size_t taps = InterpolationState::lanczos_taps;
-    const size_t num_frames = input.size() / 2;
+    constexpr std::size_t taps = InterpolationState::lanczos_taps;
+    const std::size_t num_frames = input.size() / 2;
 
     std::vector<s16> output;
-    output.reserve(static_cast<size_t>(input.size() / ratio + 4));
+    output.reserve(static_cast<std::size_t>(input.size() / ratio + 4));
 
     double& pos = state.position;
     auto& h = state.history;
-    for (size_t i = 0; i < num_frames; ++i) {
+    for (std::size_t i = 0; i < num_frames; ++i) {
         std::rotate(h.begin(), h.end() - 1, h.end());
         h[0][0] = input[i * 2 + 0];
         h[0][1] = input[i * 2 + 1];
@@ -53,7 +53,7 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input,
         while (pos <= 1.0) {
             double l = 0.0;
             double r = 0.0;
-            for (size_t j = 0; j < h.size(); j++) {
+            for (std::size_t j = 0; j < h.size(); j++) {
                 l += Lanczos(taps, pos + j - taps + 1) * h[j][0];
                 r += Lanczos(taps, pos + j - taps + 1) * h[j][1];
             }
diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h
index c79c2eef4..edbd6460f 100644
--- a/src/audio_core/algorithm/interpolate.h
+++ b/src/audio_core/algorithm/interpolate.h
@@ -12,8 +12,8 @@
 namespace AudioCore {
 
 struct InterpolationState {
-    static constexpr size_t lanczos_taps = 4;
-    static constexpr size_t history_size = lanczos_taps * 2 - 1;
+    static constexpr std::size_t lanczos_taps = 4;
+    static constexpr std::size_t history_size = lanczos_taps * 2 - 1;
 
     double current_ratio = 0.0;
     CascadingFilter nyquist;
diff --git a/src/audio_core/audio_out.cpp b/src/audio_core/audio_out.cpp
index 12632a95c..0c8f5b18e 100644
--- a/src/audio_core/audio_out.cpp
+++ b/src/audio_core/audio_out.cpp
@@ -39,7 +39,8 @@ StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&&
         sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name));
 }
 
-std::vector<Buffer::Tag> AudioOut::GetTagsAndReleaseBuffers(StreamPtr stream, size_t max_count) {
+std::vector<Buffer::Tag> AudioOut::GetTagsAndReleaseBuffers(StreamPtr stream,
+                                                            std::size_t max_count) {
     return stream->GetTagsAndReleaseBuffers(max_count);
 }
 
diff --git a/src/audio_core/audio_out.h b/src/audio_core/audio_out.h
index 39b7e656b..df9607ac7 100644
--- a/src/audio_core/audio_out.h
+++ b/src/audio_core/audio_out.h
@@ -25,7 +25,7 @@ public:
                          Stream::ReleaseCallback&& release_callback);
 
     /// Returns a vector of recently released buffers specified by tag for the specified stream
-    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, size_t max_count);
+    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count);
 
     /// Starts an audio stream for playback
     void StartStream(StreamPtr stream);
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index a75cd3be5..6f0ff953a 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -3,9 +3,12 @@
 // Refer to the license.txt file included.
 
 #include "audio_core/algorithm/interpolate.h"
+#include "audio_core/audio_out.h"
 #include "audio_core/audio_renderer.h"
+#include "audio_core/codec.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/hle/kernel/event.h"
 #include "core/memory.h"
 
 namespace AudioCore {
@@ -13,6 +16,41 @@ namespace AudioCore {
 constexpr u32 STREAM_SAMPLE_RATE{48000};
 constexpr u32 STREAM_NUM_CHANNELS{2};
 
+class AudioRenderer::VoiceState {
+public:
+    bool IsPlaying() const {
+        return is_in_use && info.play_state == PlayState::Started;
+    }
+
+    const VoiceOutStatus& GetOutStatus() const {
+        return out_status;
+    }
+
+    const VoiceInfo& GetInfo() const {
+        return info;
+    }
+
+    VoiceInfo& Info() {
+        return info;
+    }
+
+    void SetWaveIndex(std::size_t index);
+    std::vector<s16> DequeueSamples(std::size_t sample_count);
+    void UpdateState();
+    void RefreshBuffer();
+
+private:
+    bool is_in_use{};
+    bool is_refresh_pending{};
+    std::size_t wave_index{};
+    std::size_t offset{};
+    Codec::ADPCMState adpcm_state{};
+    InterpolationState interp_state{};
+    std::vector<s16> samples;
+    VoiceOutStatus out_status{};
+    VoiceInfo info{};
+};
+
 AudioRenderer::AudioRenderer(AudioRendererParameter params,
                              Kernel::SharedPtr<Kernel::Event> buffer_event)
     : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count) {
@@ -27,6 +65,8 @@ AudioRenderer::AudioRenderer(AudioRendererParameter params,
     QueueMixedBuffer(2);
 }
 
+AudioRenderer::~AudioRenderer() = default;
+
 u32 AudioRenderer::GetSampleRate() const {
     return worker_params.sample_rate;
 }
@@ -39,6 +79,10 @@ u32 AudioRenderer::GetMixBufferCount() const {
     return worker_params.mix_buffer_count;
 }
 
+Stream::State AudioRenderer::GetStreamState() const {
+    return stream->GetState();
+}
+
 std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) {
     // Copy UpdateDataHeader struct
     UpdateDataHeader config{};
@@ -52,8 +96,8 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
                 memory_pool_count * sizeof(MemoryPoolInfo));
 
     // Copy VoiceInfo structs
-    size_t offset{sizeof(UpdateDataHeader) + config.behavior_size + config.memory_pools_size +
-                  config.voice_resource_size};
+    std::size_t offset{sizeof(UpdateDataHeader) + config.behavior_size + config.memory_pools_size +
+                       config.voice_resource_size};
     for (auto& voice : voices) {
         std::memcpy(&voice.Info(), input_params.data() + offset, sizeof(VoiceInfo));
         offset += sizeof(VoiceInfo);
@@ -72,7 +116,7 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
 
     // Update memory pool state
     std::vector<MemoryPoolEntry> memory_pool(memory_pool_count);
-    for (size_t index = 0; index < memory_pool.size(); ++index) {
+    for (std::size_t index = 0; index < memory_pool.size(); ++index) {
         if (mem_pool_info[index].pool_state == MemoryPoolStates::RequestAttach) {
             memory_pool[index].state = MemoryPoolStates::Attached;
         } else if (mem_pool_info[index].pool_state == MemoryPoolStates::RequestDetach) {
@@ -93,7 +137,7 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
                 response_data.memory_pools_size);
 
     // Copy output voice status
-    size_t voice_out_status_offset{sizeof(UpdateDataHeader) + response_data.memory_pools_size};
+    std::size_t voice_out_status_offset{sizeof(UpdateDataHeader) + response_data.memory_pools_size};
     for (const auto& voice : voices) {
         std::memcpy(output_params.data() + voice_out_status_offset, &voice.GetOutStatus(),
                     sizeof(VoiceOutStatus));
@@ -103,12 +147,12 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
     return output_params;
 }
 
-void AudioRenderer::VoiceState::SetWaveIndex(size_t index) {
+void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) {
     wave_index = index & 3;
     is_refresh_pending = true;
 }
 
-std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(size_t sample_count) {
+std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_count) {
     if (!IsPlaying()) {
         return {};
     }
@@ -117,9 +161,9 @@ std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(size_t sample_count)
         RefreshBuffer();
     }
 
-    const size_t max_size{samples.size() - offset};
-    const size_t dequeue_offset{offset};
-    size_t size{sample_count * STREAM_NUM_CHANNELS};
+    const std::size_t max_size{samples.size() - offset};
+    const std::size_t dequeue_offset{offset};
+    std::size_t size{sample_count * STREAM_NUM_CHANNELS};
     if (size > max_size) {
         size = max_size;
     }
@@ -184,7 +228,7 @@ void AudioRenderer::VoiceState::RefreshBuffer() {
     case 1:
         // 1 channel is upsampled to 2 channel
         samples.resize(new_samples.size() * 2);
-        for (size_t index = 0; index < new_samples.size(); ++index) {
+        for (std::size_t index = 0; index < new_samples.size(); ++index) {
             samples[index * 2] = new_samples[index];
             samples[index * 2 + 1] = new_samples[index];
         }
@@ -210,7 +254,7 @@ static constexpr s16 ClampToS16(s32 value) {
 }
 
 void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
-    constexpr size_t BUFFER_SIZE{512};
+    constexpr std::size_t BUFFER_SIZE{512};
     std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels());
 
     for (auto& voice : voices) {
@@ -218,7 +262,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
             continue;
         }
 
-        size_t offset{};
+        std::size_t offset{};
         s64 samples_remaining{BUFFER_SIZE};
         while (samples_remaining > 0) {
             const std::vector<s16> samples{voice.DequeueSamples(samples_remaining)};
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 6d069d693..dfef89e1d 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -8,16 +8,20 @@
 #include <memory>
 #include <vector>
 
-#include "audio_core/algorithm/interpolate.h"
-#include "audio_core/audio_out.h"
-#include "audio_core/codec.h"
 #include "audio_core/stream.h"
+#include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
-#include "core/hle/kernel/event.h"
+#include "core/hle/kernel/object.h"
+
+namespace Kernel {
+class Event;
+}
 
 namespace AudioCore {
 
+class AudioOut;
+
 enum class PlayState : u8 {
     Started = 0,
     Stopped = 1,
@@ -158,53 +162,23 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size
 class AudioRenderer {
 public:
     AudioRenderer(AudioRendererParameter params, Kernel::SharedPtr<Kernel::Event> buffer_event);
+    ~AudioRenderer();
+
     std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params);
     void QueueMixedBuffer(Buffer::Tag tag);
     void ReleaseAndQueueBuffers();
     u32 GetSampleRate() const;
     u32 GetSampleCount() const;
     u32 GetMixBufferCount() const;
+    Stream::State GetStreamState() const;
 
 private:
-    class VoiceState {
-    public:
-        bool IsPlaying() const {
-            return is_in_use && info.play_state == PlayState::Started;
-        }
-
-        const VoiceOutStatus& GetOutStatus() const {
-            return out_status;
-        }
-
-        const VoiceInfo& GetInfo() const {
-            return info;
-        }
-
-        VoiceInfo& Info() {
-            return info;
-        }
-
-        void SetWaveIndex(size_t index);
-        std::vector<s16> DequeueSamples(size_t sample_count);
-        void UpdateState();
-        void RefreshBuffer();
-
-    private:
-        bool is_in_use{};
-        bool is_refresh_pending{};
-        size_t wave_index{};
-        size_t offset{};
-        Codec::ADPCMState adpcm_state{};
-        InterpolationState interp_state{};
-        std::vector<s16> samples;
-        VoiceOutStatus out_status{};
-        VoiceInfo info{};
-    };
+    class VoiceState;
 
     AudioRendererParameter worker_params;
     Kernel::SharedPtr<Kernel::Event> buffer_event;
     std::vector<VoiceState> voices;
-    std::unique_ptr<AudioCore::AudioOut> audio_out;
+    std::unique_ptr<AudioOut> audio_out;
     AudioCore::StreamPtr stream;
 };
 
diff --git a/src/audio_core/codec.cpp b/src/audio_core/codec.cpp
index c3021403f..454de798b 100644
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -8,27 +8,27 @@
 
 namespace AudioCore::Codec {
 
-std::vector<s16> DecodeADPCM(const u8* const data, size_t size, const ADPCM_Coeff& coeff,
+std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM_Coeff& coeff,
                              ADPCMState& state) {
     // GC-ADPCM with scale factor and variable coefficients.
     // Frames are 8 bytes long containing 14 samples each.
     // Samples are 4 bits (one nibble) long.
 
-    constexpr size_t FRAME_LEN = 8;
-    constexpr size_t SAMPLES_PER_FRAME = 14;
+    constexpr std::size_t FRAME_LEN = 8;
+    constexpr std::size_t SAMPLES_PER_FRAME = 14;
     constexpr std::array<int, 16> SIGNED_NIBBLES = {
         {0, 1, 2, 3, 4, 5, 6, 7, -8, -7, -6, -5, -4, -3, -2, -1}};
 
-    const size_t sample_count = (size / FRAME_LEN) * SAMPLES_PER_FRAME;
-    const size_t ret_size =
+    const std::size_t sample_count = (size / FRAME_LEN) * SAMPLES_PER_FRAME;
+    const std::size_t ret_size =
         sample_count % 2 == 0 ? sample_count : sample_count + 1; // Ensure multiple of two.
     std::vector<s16> ret(ret_size);
 
     int yn1 = state.yn1, yn2 = state.yn2;
 
-    const size_t NUM_FRAMES =
+    const std::size_t NUM_FRAMES =
         (sample_count + (SAMPLES_PER_FRAME - 1)) / SAMPLES_PER_FRAME; // Round up.
-    for (size_t framei = 0; framei < NUM_FRAMES; framei++) {
+    for (std::size_t framei = 0; framei < NUM_FRAMES; framei++) {
         const int frame_header = data[framei * FRAME_LEN];
         const int scale = 1 << (frame_header & 0xF);
         const int idx = (frame_header >> 4) & 0x7;
@@ -53,9 +53,9 @@ std::vector<s16> DecodeADPCM(const u8* const data, size_t size, const ADPCM_Coef
             return static_cast<s16>(val);
         };
 
-        size_t outputi = framei * SAMPLES_PER_FRAME;
-        size_t datai = framei * FRAME_LEN + 1;
-        for (size_t i = 0; i < SAMPLES_PER_FRAME && outputi < sample_count; i += 2) {
+        std::size_t outputi = framei * SAMPLES_PER_FRAME;
+        std::size_t datai = framei * FRAME_LEN + 1;
+        for (std::size_t i = 0; i < SAMPLES_PER_FRAME && outputi < sample_count; i += 2) {
             const s16 sample1 = decode_sample(SIGNED_NIBBLES[data[datai] >> 4]);
             ret[outputi] = sample1;
             outputi++;
diff --git a/src/audio_core/codec.h b/src/audio_core/codec.h
index 3f845c42c..ef2ce01a8 100644
--- a/src/audio_core/codec.h
+++ b/src/audio_core/codec.h
@@ -38,7 +38,7 @@ using ADPCM_Coeff = std::array<s16, 16>;
  * @param state ADPCM state, this is updated with new state
  * @return Decoded stereo signed PCM16 data, sample_count in length
  */
-std::vector<s16> DecodeADPCM(const u8* const data, size_t size, const ADPCM_Coeff& coeff,
+std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM_Coeff& coeff,
                              ADPCMState& state);
 
 }; // namespace AudioCore::Codec
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 79155a7a0..392039688 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -63,8 +63,8 @@ public:
             // Downsample 6 channels to 2
             std::vector<s16> buf;
             buf.reserve(samples.size() * num_channels / source_num_channels);
-            for (size_t i = 0; i < samples.size(); i += source_num_channels) {
-                for (size_t ch = 0; ch < num_channels; ch++) {
+            for (std::size_t i = 0; i < samples.size(); i += source_num_channels) {
+                for (std::size_t ch = 0; ch < num_channels; ch++) {
                     buf.push_back(samples[i + ch]);
                 }
             }
@@ -75,7 +75,7 @@ public:
         queue.Push(samples);
     }
 
-    size_t SamplesInQueue(u32 num_channels) const override {
+    std::size_t SamplesInQueue(u32 num_channels) const override {
         if (!ctx)
             return 0;
 
@@ -119,10 +119,10 @@ CubebSink::CubebSink(std::string target_device_name) {
             LOG_WARNING(Audio_Sink, "Audio output device enumeration not supported");
         } else {
             const auto collection_end{collection.device + collection.count};
-            const auto device{std::find_if(collection.device, collection_end,
-                                           [&](const cubeb_device_info& device) {
-                                               return target_device_name == device.friendly_name;
-                                           })};
+            const auto device{
+                std::find_if(collection.device, collection_end, [&](const cubeb_device_info& info) {
+                    return target_device_name == info.friendly_name;
+                })};
             if (device != collection_end) {
                 output_device = device->devid;
             }
@@ -159,15 +159,16 @@ long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const
         return {};
     }
 
-    const size_t num_channels = impl->GetNumChannels();
-    const size_t samples_to_write = num_channels * num_frames;
-    size_t samples_written;
+    const std::size_t num_channels = impl->GetNumChannels();
+    const std::size_t samples_to_write = num_channels * num_frames;
+    std::size_t samples_written;
 
     if (Settings::values.enable_audio_stretching) {
         const std::vector<s16> in{impl->queue.Pop()};
-        const size_t num_in{in.size() / num_channels};
+        const std::size_t num_in{in.size() / num_channels};
         s16* const out{reinterpret_cast<s16*>(buffer)};
-        const size_t out_frames = impl->time_stretch.Process(in.data(), num_in, out, num_frames);
+        const std::size_t out_frames =
+            impl->time_stretch.Process(in.data(), num_in, out, num_frames);
         samples_written = out_frames * num_channels;
 
         if (impl->should_flush) {
@@ -184,7 +185,7 @@ long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const
     }
 
     // Fill the rest of the frames with last_frame
-    for (size_t i = samples_written; i < samples_to_write; i += num_channels) {
+    for (std::size_t i = samples_written; i < samples_to_write; i += num_channels) {
         std::memcpy(buffer + i * sizeof(s16), &impl->last_frame[0], num_channels * sizeof(s16));
     }
 
@@ -197,7 +198,7 @@ std::vector<std::string> ListCubebSinkDevices() {
     std::vector<std::string> device_list;
     cubeb* ctx;
 
-    if (cubeb_init(&ctx, "Citra Device Enumerator", nullptr) != CUBEB_OK) {
+    if (cubeb_init(&ctx, "yuzu Device Enumerator", nullptr) != CUBEB_OK) {
         LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
         return {};
     }
@@ -206,7 +207,7 @@ std::vector<std::string> ListCubebSinkDevices() {
     if (cubeb_enumerate_devices(ctx, CUBEB_DEVICE_TYPE_OUTPUT, &collection) != CUBEB_OK) {
         LOG_WARNING(Audio_Sink, "Audio output device enumeration not supported");
     } else {
-        for (size_t i = 0; i < collection.count; i++) {
+        for (std::size_t i = 0; i < collection.count; i++) {
             const cubeb_device_info& device = collection.device[i];
             if (device.friendly_name) {
                 device_list.emplace_back(device.friendly_name);
diff --git a/src/audio_core/null_sink.h b/src/audio_core/null_sink.h
index 2ed0c83b6..a78d78893 100644
--- a/src/audio_core/null_sink.h
+++ b/src/audio_core/null_sink.h
@@ -22,7 +22,7 @@ private:
     struct NullSinkStreamImpl final : SinkStream {
         void EnqueueSamples(u32 /*num_channels*/, const std::vector<s16>& /*samples*/) override {}
 
-        size_t SamplesInQueue(u32 /*num_channels*/) const override {
+        std::size_t SamplesInQueue(u32 /*num_channels*/) const override {
             return 0;
         }
 
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 84dcdd98d..742a5e0a0 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -7,6 +7,7 @@
 
 #include "audio_core/sink.h"
 #include "audio_core/sink_details.h"
+#include "audio_core/sink_stream.h"
 #include "audio_core/stream.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
@@ -17,7 +18,7 @@
 
 namespace AudioCore {
 
-constexpr size_t MaxAudioBufferCount{32};
+constexpr std::size_t MaxAudioBufferCount{32};
 
 u32 Stream::GetNumChannels() const {
     switch (format) {
@@ -48,11 +49,16 @@ void Stream::Play() {
 }
 
 void Stream::Stop() {
+    state = State::Stopped;
     ASSERT_MSG(false, "Unimplemented");
 }
 
+Stream::State Stream::GetState() const {
+    return state;
+}
+
 s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
-    const size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
+    const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
     return CoreTiming::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
 }
 
@@ -122,9 +128,9 @@ bool Stream::ContainsBuffer(Buffer::Tag tag) const {
     return {};
 }
 
-std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(size_t max_count) {
+std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(std::size_t max_count) {
     std::vector<Buffer::Tag> tags;
-    for (size_t count = 0; count < max_count && !released_buffers.empty(); ++count) {
+    for (std::size_t count = 0; count < max_count && !released_buffers.empty(); ++count) {
         tags.push_back(released_buffers.front()->GetTag());
         released_buffers.pop();
     }
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index 049b92ca9..aebfeb51d 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -11,13 +11,16 @@
 #include <queue>
 
 #include "audio_core/buffer.h"
-#include "audio_core/sink_stream.h"
-#include "common/assert.h"
 #include "common/common_types.h"
-#include "core/core_timing.h"
+
+namespace CoreTiming {
+struct EventType;
+}
 
 namespace AudioCore {
 
+class SinkStream;
+
 /**
  * Represents an audio stream, which is a sequence of queued buffers, to be outputed by AudioOut
  */
@@ -30,6 +33,12 @@ public:
         Multi51Channel16,
     };
 
+    /// Current state of the stream
+    enum class State {
+        Stopped,
+        Playing,
+    };
+
     /// Callback function type, used to change guest state on a buffer being released
     using ReleaseCallback = std::function<void()>;
 
@@ -49,7 +58,7 @@ public:
     bool ContainsBuffer(Buffer::Tag tag) const;
 
     /// Returns a vector of recently released buffers specified by tag
-    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(size_t max_count);
+    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(std::size_t max_count);
 
     /// Returns true if the stream is currently playing
     bool IsPlaying() const {
@@ -57,7 +66,7 @@ public:
     }
 
     /// Returns the number of queued buffers
-    size_t GetQueueSize() const {
+    std::size_t GetQueueSize() const {
         return queued_buffers.size();
     }
 
@@ -69,13 +78,10 @@ public:
     /// Gets the number of channels
     u32 GetNumChannels() const;
 
-private:
-    /// Current state of the stream
-    enum class State {
-        Stopped,
-        Playing,
-    };
+    /// Get the state
+    State GetState() const;
 
+private:
     /// Plays the next queued buffer in the audio stream, starting playback if necessary
     void PlayNextBuffer();
 
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp
index da094c46b..d72d67994 100644
--- a/src/audio_core/time_stretch.cpp
+++ b/src/audio_core/time_stretch.cpp
@@ -26,7 +26,8 @@ void TimeStretcher::Flush() {
     m_sound_touch.flush();
 }
 
-size_t TimeStretcher::Process(const s16* in, size_t num_in, s16* out, size_t num_out) {
+std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out,
+                                   std::size_t num_out) {
     const double time_delta = static_cast<double>(num_out) / m_sample_rate; // seconds
 
     // We were given actual_samples number of samples, and num_samples were requested from us.
@@ -58,11 +59,11 @@ size_t TimeStretcher::Process(const s16* in, size_t num_in, s16* out, size_t num
     m_stretch_ratio = std::max(m_stretch_ratio, 0.05);
     m_sound_touch.setTempo(m_stretch_ratio);
 
-    LOG_DEBUG(Audio, "{:5}/{:5} ratio:{:0.6f} backlog:{:0.6f}", num_in, num_out, m_stretch_ratio,
+    LOG_TRACE(Audio, "{:5}/{:5} ratio:{:0.6f} backlog:{:0.6f}", num_in, num_out, m_stretch_ratio,
               backlog_fullness);
 
-    m_sound_touch.putSamples(in, num_in);
-    return m_sound_touch.receiveSamples(out, num_out);
+    m_sound_touch.putSamples(in, static_cast<u32>(num_in));
+    return m_sound_touch.receiveSamples(out, static_cast<u32>(num_out));
 }
 
 } // namespace AudioCore
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
index 7e39e695e..decd760f1 100644
--- a/src/audio_core/time_stretch.h
+++ b/src/audio_core/time_stretch.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <array>
 #include <cstddef>
 #include <SoundTouch.h>
 #include "common/common_types.h"
@@ -20,7 +19,7 @@ public:
     /// @param out      Output sample buffer
     /// @param num_out  Desired number of output frames in `out`
     /// @returns Actual number of frames written to `out`
-    size_t Process(const s16* in, size_t num_in, s16* out, size_t num_out);
+    std::size_t Process(const s16* in, std::size_t num_in, s16* out, std::size_t num_out);
 
     void Clear();
 
diff --git a/src/common/alignment.h b/src/common/alignment.h
index b9dd38746..225770fab 100644
--- a/src/common/alignment.h
+++ b/src/common/alignment.h
@@ -8,13 +8,13 @@
 namespace Common {
 
 template <typename T>
-constexpr T AlignUp(T value, size_t size) {
+constexpr T AlignUp(T value, std::size_t size) {
     static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
     return static_cast<T>(value + (size - value % size) % size);
 }
 
 template <typename T>
-constexpr T AlignDown(T value, size_t size) {
+constexpr T AlignDown(T value, std::size_t size) {
     static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
     return static_cast<T>(value - value % size);
 }
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 732201de7..bf803da8d 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -129,8 +129,8 @@ private:
 
 public:
     /// Constants to allow limited introspection of fields if needed
-    static constexpr size_t position = Position;
-    static constexpr size_t bits = Bits;
+    static constexpr std::size_t position = Position;
+    static constexpr std::size_t bits = Bits;
     static constexpr StorageType mask = (((StorageTypeU)~0) >> (8 * sizeof(T) - bits)) << position;
 
     /**
diff --git a/src/common/bit_set.h b/src/common/bit_set.h
index 5a197d8c1..5cd1352b2 100644
--- a/src/common/bit_set.h
+++ b/src/common/bit_set.h
@@ -170,14 +170,14 @@ public:
             m_val |= (IntTy)1 << bit;
     }
 
-    static BitSet AllTrue(size_t count) {
+    static BitSet AllTrue(std::size_t count) {
         return BitSet(count == sizeof(IntTy) * 8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
     }
 
-    Ref operator[](size_t bit) {
+    Ref operator[](std::size_t bit) {
         return Ref(this, (IntTy)1 << bit);
     }
-    const Ref operator[](size_t bit) const {
+    const Ref operator[](std::size_t bit) const {
         return (*const_cast<BitSet*>(this))[bit];
     }
     bool operator==(BitSet other) const {
diff --git a/src/common/cityhash.cpp b/src/common/cityhash.cpp
index de31ffbd8..4e1d874b5 100644
--- a/src/common/cityhash.cpp
+++ b/src/common/cityhash.cpp
@@ -114,7 +114,7 @@ static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
     return b;
 }
 
-static uint64 HashLen0to16(const char* s, size_t len) {
+static uint64 HashLen0to16(const char* s, std::size_t len) {
     if (len >= 8) {
         uint64 mul = k2 + len * 2;
         uint64 a = Fetch64(s) + k2;
@@ -141,7 +141,7 @@ static uint64 HashLen0to16(const char* s, size_t len) {
 
 // This probably works well for 16-byte strings as well, but it may be overkill
 // in that case.
-static uint64 HashLen17to32(const char* s, size_t len) {
+static uint64 HashLen17to32(const char* s, std::size_t len) {
     uint64 mul = k2 + len * 2;
     uint64 a = Fetch64(s) * k1;
     uint64 b = Fetch64(s + 8);
@@ -170,7 +170,7 @@ static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint
 }
 
 // Return an 8-byte hash for 33 to 64 bytes.
-static uint64 HashLen33to64(const char* s, size_t len) {
+static uint64 HashLen33to64(const char* s, std::size_t len) {
     uint64 mul = k2 + len * 2;
     uint64 a = Fetch64(s) * k2;
     uint64 b = Fetch64(s + 8);
@@ -191,7 +191,7 @@ static uint64 HashLen33to64(const char* s, size_t len) {
     return b + x;
 }
 
-uint64 CityHash64(const char* s, size_t len) {
+uint64 CityHash64(const char* s, std::size_t len) {
     if (len <= 32) {
         if (len <= 16) {
             return HashLen0to16(s, len);
@@ -212,7 +212,7 @@ uint64 CityHash64(const char* s, size_t len) {
     x = x * k1 + Fetch64(s);
 
     // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
-    len = (len - 1) & ~static_cast<size_t>(63);
+    len = (len - 1) & ~static_cast<std::size_t>(63);
     do {
         x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
         y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
@@ -229,17 +229,17 @@ uint64 CityHash64(const char* s, size_t len) {
                      HashLen16(v.second, w.second) + x);
 }
 
-uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
+uint64 CityHash64WithSeed(const char* s, std::size_t len, uint64 seed) {
     return CityHash64WithSeeds(s, len, k2, seed);
 }
 
-uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) {
+uint64 CityHash64WithSeeds(const char* s, std::size_t len, uint64 seed0, uint64 seed1) {
     return HashLen16(CityHash64(s, len) - seed0, seed1);
 }
 
 // A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
 // of any length representable in signed long.  Based on City and Murmur.
-static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
+static uint128 CityMurmur(const char* s, std::size_t len, uint128 seed) {
     uint64 a = Uint128Low64(seed);
     uint64 b = Uint128High64(seed);
     uint64 c = 0;
@@ -269,7 +269,7 @@ static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
     return uint128(a ^ b, HashLen16(b, a));
 }
 
-uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
+uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) {
     if (len < 128) {
         return CityMurmur(s, len, seed);
     }
@@ -313,7 +313,7 @@ uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
     w.first *= 9;
     v.first *= k0;
     // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
-    for (size_t tail_done = 0; tail_done < len;) {
+    for (std::size_t tail_done = 0; tail_done < len;) {
         tail_done += 32;
         y = Rotate(x + y, 42) * k0 + v.second;
         w.first += Fetch64(s + len - tail_done + 16);
@@ -331,7 +331,7 @@ uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
     return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
 }
 
-uint128 CityHash128(const char* s, size_t len) {
+uint128 CityHash128(const char* s, std::size_t len) {
     return len >= 16
                ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0))
                : CityHash128WithSeed(s, len, uint128(k0, k1));
diff --git a/src/common/cityhash.h b/src/common/cityhash.h
index bcebdb150..4b94f8e18 100644
--- a/src/common/cityhash.h
+++ b/src/common/cityhash.h
@@ -63,7 +63,7 @@
 
 #include <utility>
 #include <stdint.h>
-#include <stdlib.h> // for size_t.
+#include <stdlib.h> // for std::size_t.
 
 namespace Common {
 
@@ -77,22 +77,22 @@ inline uint64_t Uint128High64(const uint128& x) {
 }
 
 // Hash function for a byte array.
-uint64_t CityHash64(const char* buf, size_t len);
+uint64_t CityHash64(const char* buf, std::size_t len);
 
 // Hash function for a byte array.  For convenience, a 64-bit seed is also
 // hashed into the result.
-uint64_t CityHash64WithSeed(const char* buf, size_t len, uint64_t seed);
+uint64_t CityHash64WithSeed(const char* buf, std::size_t len, uint64_t seed);
 
 // Hash function for a byte array.  For convenience, two seeds are also
 // hashed into the result.
-uint64_t CityHash64WithSeeds(const char* buf, size_t len, uint64_t seed0, uint64_t seed1);
+uint64_t CityHash64WithSeeds(const char* buf, std::size_t len, uint64_t seed0, uint64_t seed1);
 
 // Hash function for a byte array.
-uint128 CityHash128(const char* s, size_t len);
+uint128 CityHash128(const char* s, std::size_t len);
 
 // Hash function for a byte array.  For convenience, a 128-bit seed is also
 // hashed into the result.
-uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed);
+uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed);
 
 // Hash 128 input bits down to 64 bits of output.
 // This is intended to be a reasonably good hash function.
diff --git a/src/common/common_paths.h b/src/common/common_paths.h
index df2ce80b1..4f88de768 100644
--- a/src/common/common_paths.h
+++ b/src/common/common_paths.h
@@ -33,6 +33,8 @@
 #define NAND_DIR "nand"
 #define SYSDATA_DIR "sysdata"
 #define KEYS_DIR "keys"
+#define LOAD_DIR "load"
+#define DUMP_DIR "dump"
 #define LOG_DIR "log"
 
 // Filenames
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index baa721481..548463787 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -76,7 +76,7 @@ namespace FileUtil {
 // Modifies argument.
 static void StripTailDirSlashes(std::string& fname) {
     if (fname.length() > 1) {
-        size_t i = fname.length();
+        std::size_t i = fname.length();
         while (i > 0 && fname[i - 1] == DIR_SEP_CHR)
             --i;
         fname.resize(i);
@@ -201,7 +201,7 @@ bool CreateFullPath(const std::string& fullPath) {
         return true;
     }
 
-    size_t position = 0;
+    std::size_t position = 0;
     while (true) {
         // Find next sub path
         position = fullPath.find(DIR_SEP_CHR, position);
@@ -299,7 +299,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
     std::array<char, 1024> buffer;
     while (!feof(input.get())) {
         // read input
-        size_t rnum = fread(buffer.data(), sizeof(char), buffer.size(), input.get());
+        std::size_t rnum = fread(buffer.data(), sizeof(char), buffer.size(), input.get());
         if (rnum != buffer.size()) {
             if (ferror(input.get()) != 0) {
                 LOG_ERROR(Common_Filesystem, "failed reading from source, {} --> {}: {}",
@@ -309,7 +309,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
         }
 
         // write output
-        size_t wnum = fwrite(buffer.data(), sizeof(char), rnum, output.get());
+        std::size_t wnum = fwrite(buffer.data(), sizeof(char), rnum, output.get());
         if (wnum != rnum) {
             LOG_ERROR(Common_Filesystem, "failed writing to output, {} --> {}: {}", srcFilename,
                       destFilename, GetLastErrorMsg());
@@ -705,6 +705,8 @@ const std::string& GetUserPath(UserPath path, const std::string& new_path) {
 #endif
         paths.emplace(UserPath::SDMCDir, user_path + SDMC_DIR DIR_SEP);
         paths.emplace(UserPath::NANDDir, user_path + NAND_DIR DIR_SEP);
+        paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP);
+        paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP);
         paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP);
         paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP);
         // TODO: Put the logs in a better location for each OS
@@ -756,11 +758,11 @@ std::string GetNANDRegistrationDir(bool system) {
     return GetUserPath(UserPath::NANDDir) + "user/Contents/registered/";
 }
 
-size_t WriteStringToFile(bool text_file, const std::string& str, const char* filename) {
+std::size_t WriteStringToFile(bool text_file, const std::string& str, const char* filename) {
     return FileUtil::IOFile(filename, text_file ? "w" : "wb").WriteBytes(str.data(), str.size());
 }
 
-size_t ReadFileToString(bool text_file, const char* filename, std::string& str) {
+std::size_t ReadFileToString(bool text_file, const char* filename, std::string& str) {
     IOFile file(filename, text_file ? "r" : "rb");
 
     if (!file.IsOpen())
@@ -829,7 +831,7 @@ std::vector<std::string> SplitPathComponents(std::string_view filename) {
 std::string_view GetParentPath(std::string_view path) {
     const auto name_bck_index = path.rfind('\\');
     const auto name_fwd_index = path.rfind('/');
-    size_t name_index;
+    std::size_t name_index;
 
     if (name_bck_index == std::string_view::npos || name_fwd_index == std::string_view::npos) {
         name_index = std::min(name_bck_index, name_fwd_index);
@@ -868,7 +870,7 @@ std::string_view GetFilename(std::string_view path) {
 }
 
 std::string_view GetExtensionFromFilename(std::string_view name) {
-    const size_t index = name.rfind('.');
+    const std::size_t index = name.rfind('.');
 
     if (index == std::string_view::npos) {
         return {};
diff --git a/src/common/file_util.h b/src/common/file_util.h
index 2f13d0b6b..3d8fe6264 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -29,6 +29,8 @@ enum class UserPath {
     NANDDir,
     RootDir,
     SDMCDir,
+    LoadDir,
+    DumpDir,
     SysDataDir,
     UserDir,
 };
@@ -143,8 +145,9 @@ const std::string& GetExeDirectory();
 std::string AppDataRoamingDirectory();
 #endif
 
-size_t WriteStringToFile(bool text_file, const std::string& str, const char* filename);
-size_t ReadFileToString(bool text_file, const char* filename, std::string& str);
+std::size_t WriteStringToFile(bool text_file, const std::string& str, const char* filename);
+
+std::size_t ReadFileToString(bool text_file, const char* filename, std::string& str);
 
 /**
  * Splits the filename into 8.3 format
@@ -177,10 +180,10 @@ std::string_view RemoveTrailingSlash(std::string_view path);
 
 // Creates a new vector containing indices [first, last) from the original.
 template <typename T>
-std::vector<T> SliceVector(const std::vector<T>& vector, size_t first, size_t last) {
+std::vector<T> SliceVector(const std::vector<T>& vector, std::size_t first, std::size_t last) {
     if (first >= last)
         return {};
-    last = std::min<size_t>(last, vector.size());
+    last = std::min<std::size_t>(last, vector.size());
     return std::vector<T>(vector.begin() + first, vector.begin() + first + last);
 }
 
@@ -213,47 +216,47 @@ public:
     bool Close();
 
     template <typename T>
-    size_t ReadArray(T* data, size_t length) const {
+    std::size_t ReadArray(T* data, std::size_t length) const {
         static_assert(std::is_trivially_copyable_v<T>,
                       "Given array does not consist of trivially copyable objects");
 
         if (!IsOpen()) {
-            return std::numeric_limits<size_t>::max();
+            return std::numeric_limits<std::size_t>::max();
         }
 
         return std::fread(data, sizeof(T), length, m_file);
     }
 
     template <typename T>
-    size_t WriteArray(const T* data, size_t length) {
+    std::size_t WriteArray(const T* data, std::size_t length) {
         static_assert(std::is_trivially_copyable_v<T>,
                       "Given array does not consist of trivially copyable objects");
         if (!IsOpen()) {
-            return std::numeric_limits<size_t>::max();
+            return std::numeric_limits<std::size_t>::max();
         }
 
         return std::fwrite(data, sizeof(T), length, m_file);
     }
 
     template <typename T>
-    size_t ReadBytes(T* data, size_t length) const {
+    std::size_t ReadBytes(T* data, std::size_t length) const {
         static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
         return ReadArray(reinterpret_cast<char*>(data), length);
     }
 
     template <typename T>
-    size_t WriteBytes(const T* data, size_t length) {
+    std::size_t WriteBytes(const T* data, std::size_t length) {
         static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
         return WriteArray(reinterpret_cast<const char*>(data), length);
     }
 
     template <typename T>
-    size_t WriteObject(const T& object) {
+    std::size_t WriteObject(const T& object) {
         static_assert(!std::is_pointer_v<T>, "WriteObject arguments must not be a pointer");
         return WriteArray(&object, 1);
     }
 
-    size_t WriteString(const std::string& str) {
+    std::size_t WriteString(const std::string& str) {
         return WriteArray(str.c_str(), str.length());
     }
 
diff --git a/src/common/hash.h b/src/common/hash.h
index 2c761e545..40194d1ee 100644
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -17,7 +17,7 @@ namespace Common {
  * @param len Length of data (in bytes) to compute hash over
  * @returns 64-bit hash value that was computed over the data block
  */
-static inline u64 ComputeHash64(const void* data, size_t len) {
+static inline u64 ComputeHash64(const void* data, std::size_t len) {
     return CityHash64(static_cast<const char*>(data), len);
 }
 
@@ -63,7 +63,7 @@ struct HashableStruct {
         return !(*this == o);
     };
 
-    size_t Hash() const {
+    std::size_t Hash() const {
         return Common::ComputeStructHash64(state);
     }
 };
diff --git a/src/common/hex_util.cpp b/src/common/hex_util.cpp
index 8e0a9e46f..589ae5cbf 100644
--- a/src/common/hex_util.cpp
+++ b/src/common/hex_util.cpp
@@ -18,7 +18,7 @@ u8 ToHexNibble(char c1) {
     return 0;
 }
 
-std::array<u8, 16> operator""_array16(const char* str, size_t len) {
+std::array<u8, 16> operator""_array16(const char* str, std::size_t len) {
     if (len != 32) {
         LOG_ERROR(Common,
                   "Attempting to parse string to array that is not of correct size (expected=32, "
@@ -29,7 +29,7 @@ std::array<u8, 16> operator""_array16(const char* str, size_t len) {
     return HexStringToArray<16>(str);
 }
 
-std::array<u8, 32> operator""_array32(const char* str, size_t len) {
+std::array<u8, 32> operator""_array32(const char* str, std::size_t len) {
     if (len != 64) {
         LOG_ERROR(Common,
                   "Attempting to parse string to array that is not of correct size (expected=64, "
diff --git a/src/common/hex_util.h b/src/common/hex_util.h
index 5fb79bb72..863a5ccd9 100644
--- a/src/common/hex_util.h
+++ b/src/common/hex_util.h
@@ -14,20 +14,20 @@ namespace Common {
 
 u8 ToHexNibble(char c1);
 
-template <size_t Size, bool le = false>
+template <std::size_t Size, bool le = false>
 std::array<u8, Size> HexStringToArray(std::string_view str) {
     std::array<u8, Size> out{};
     if constexpr (le) {
-        for (size_t i = 2 * Size - 2; i <= 2 * Size; i -= 2)
+        for (std::size_t i = 2 * Size - 2; i <= 2 * Size; i -= 2)
             out[i / 2] = (ToHexNibble(str[i]) << 4) | ToHexNibble(str[i + 1]);
     } else {
-        for (size_t i = 0; i < 2 * Size; i += 2)
+        for (std::size_t i = 0; i < 2 * Size; i += 2)
             out[i / 2] = (ToHexNibble(str[i]) << 4) | ToHexNibble(str[i + 1]);
     }
     return out;
 }
 
-template <size_t Size>
+template <std::size_t Size>
 std::string HexArrayToString(std::array<u8, Size> array, bool upper = true) {
     std::string out;
     for (u8 c : array)
@@ -35,7 +35,7 @@ std::string HexArrayToString(std::array<u8, Size> array, bool upper = true) {
     return out;
 }
 
-std::array<u8, 0x10> operator"" _array16(const char* str, size_t len);
-std::array<u8, 0x20> operator"" _array32(const char* str, size_t len);
+std::array<u8, 0x10> operator"" _array16(const char* str, std::size_t len);
+std::array<u8, 0x20> operator"" _array32(const char* str, std::size_t len);
 
 } // namespace Common
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 1323f8d0f..9f5918851 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -135,7 +135,7 @@ FileBackend::FileBackend(const std::string& filename)
 void FileBackend::Write(const Entry& entry) {
     // prevent logs from going over the maximum size (in case its spamming and the user doesn't
     // know)
-    constexpr size_t MAX_BYTES_WRITTEN = 50 * 1024L * 1024L;
+    constexpr std::size_t MAX_BYTES_WRITTEN = 50 * 1024L * 1024L;
     if (!file.IsOpen() || bytes_written > MAX_BYTES_WRITTEN) {
         return;
     }
@@ -183,6 +183,7 @@ void FileBackend::Write(const Entry& entry) {
     SUB(Service, FS)                                                                               \
     SUB(Service, GRC)                                                                              \
     SUB(Service, HID)                                                                              \
+    SUB(Service, IRS)                                                                              \
     SUB(Service, LBL)                                                                              \
     SUB(Service, LDN)                                                                              \
     SUB(Service, LDR)                                                                              \
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index b3f4b9cef..11edbf1b6 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -100,7 +100,7 @@ public:
 
 private:
     FileUtil::IOFile file;
-    size_t bytes_written;
+    std::size_t bytes_written;
 };
 
 void AddBackend(std::unique_ptr<Backend> backend);
diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp
index 2dd331152..2eccbcd8d 100644
--- a/src/common/logging/filter.cpp
+++ b/src/common/logging/filter.cpp
@@ -71,7 +71,7 @@ void Filter::ResetAll(Level level) {
 }
 
 void Filter::SetClassLevel(Class log_class, Level level) {
-    class_levels[static_cast<size_t>(log_class)] = level;
+    class_levels[static_cast<std::size_t>(log_class)] = level;
 }
 
 void Filter::ParseFilterString(std::string_view filter_view) {
@@ -93,7 +93,8 @@ void Filter::ParseFilterString(std::string_view filter_view) {
 }
 
 bool Filter::CheckMessage(Class log_class, Level level) const {
-    return static_cast<u8>(level) >= static_cast<u8>(class_levels[static_cast<size_t>(log_class)]);
+    return static_cast<u8>(level) >=
+           static_cast<u8>(class_levels[static_cast<std::size_t>(log_class)]);
 }
 
 bool Filter::IsDebug() const {
diff --git a/src/common/logging/filter.h b/src/common/logging/filter.h
index f7e3b87c9..773df6f2c 100644
--- a/src/common/logging/filter.h
+++ b/src/common/logging/filter.h
@@ -49,6 +49,6 @@ public:
     bool IsDebug() const;
 
 private:
-    std::array<Level, static_cast<size_t>(Class::Count)> class_levels;
+    std::array<Level, static_cast<std::size_t>(Class::Count)> class_levels;
 };
 } // namespace Log
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 4d577524f..abbd056ee 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -70,6 +70,7 @@ enum class Class : ClassType {
     Service_FS,        ///< The FS (Filesystem) service
     Service_GRC,       ///< The game recording service
     Service_HID,       ///< The HID (Human interface device) service
+    Service_IRS,       ///< The IRS service
     Service_LBL,       ///< The LBL (LCD backlight) service
     Service_LDN,       ///< The LDN (Local domain network) service
     Service_LDR,       ///< The loader service
diff --git a/src/common/memory_util.cpp b/src/common/memory_util.cpp
index 09462ccee..9736fb12a 100644
--- a/src/common/memory_util.cpp
+++ b/src/common/memory_util.cpp
@@ -25,7 +25,7 @@
 // This is purposely not a full wrapper for virtualalloc/mmap, but it
 // provides exactly the primitive operations that Dolphin needs.
 
-void* AllocateExecutableMemory(size_t size, bool low) {
+void* AllocateExecutableMemory(std::size_t size, bool low) {
 #if defined(_WIN32)
     void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
 #else
@@ -74,7 +74,7 @@ void* AllocateExecutableMemory(size_t size, bool low) {
     return ptr;
 }
 
-void* AllocateMemoryPages(size_t size) {
+void* AllocateMemoryPages(std::size_t size) {
 #ifdef _WIN32
     void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE);
 #else
@@ -90,7 +90,7 @@ void* AllocateMemoryPages(size_t size) {
     return ptr;
 }
 
-void* AllocateAlignedMemory(size_t size, size_t alignment) {
+void* AllocateAlignedMemory(std::size_t size, std::size_t alignment) {
 #ifdef _WIN32
     void* ptr = _aligned_malloc(size, alignment);
 #else
@@ -109,7 +109,7 @@ void* AllocateAlignedMemory(size_t size, size_t alignment) {
     return ptr;
 }
 
-void FreeMemoryPages(void* ptr, size_t size) {
+void FreeMemoryPages(void* ptr, std::size_t size) {
     if (ptr) {
 #ifdef _WIN32
         if (!VirtualFree(ptr, 0, MEM_RELEASE))
@@ -130,7 +130,7 @@ void FreeAlignedMemory(void* ptr) {
     }
 }
 
-void WriteProtectMemory(void* ptr, size_t size, bool allowExecute) {
+void WriteProtectMemory(void* ptr, std::size_t size, bool allowExecute) {
 #ifdef _WIN32
     DWORD oldValue;
     if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldValue))
@@ -140,7 +140,7 @@ void WriteProtectMemory(void* ptr, size_t size, bool allowExecute) {
 #endif
 }
 
-void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute) {
+void UnWriteProtectMemory(void* ptr, std::size_t size, bool allowExecute) {
 #ifdef _WIN32
     DWORD oldValue;
     if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE,
diff --git a/src/common/memory_util.h b/src/common/memory_util.h
index 76ca5a30c..aad071979 100644
--- a/src/common/memory_util.h
+++ b/src/common/memory_util.h
@@ -7,13 +7,13 @@
 #include <cstddef>
 #include <string>
 
-void* AllocateExecutableMemory(size_t size, bool low = true);
-void* AllocateMemoryPages(size_t size);
-void FreeMemoryPages(void* ptr, size_t size);
-void* AllocateAlignedMemory(size_t size, size_t alignment);
+void* AllocateExecutableMemory(std::size_t size, bool low = true);
+void* AllocateMemoryPages(std::size_t size);
+void FreeMemoryPages(void* ptr, std::size_t size);
+void* AllocateAlignedMemory(std::size_t size, std::size_t alignment);
 void FreeAlignedMemory(void* ptr);
-void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
-void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false);
+void WriteProtectMemory(void* ptr, std::size_t size, bool executable = false);
+void UnWriteProtectMemory(void* ptr, std::size_t size, bool allowExecute = false);
 std::string MemUsage();
 
 inline int GetPageSize() {
diff --git a/src/common/misc.cpp b/src/common/misc.cpp
index 3fa8a3bc4..68cb86cd1 100644
--- a/src/common/misc.cpp
+++ b/src/common/misc.cpp
@@ -16,7 +16,7 @@
 // Call directly after the command or use the error num.
 // This function might change the error code.
 std::string GetLastErrorMsg() {
-    static const size_t buff_size = 255;
+    static const std::size_t buff_size = 255;
     char err_str[buff_size];
 
 #ifdef _WIN32
diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h
index 30d934a38..abe3b4dc2 100644
--- a/src/common/ring_buffer.h
+++ b/src/common/ring_buffer.h
@@ -9,6 +9,7 @@
 #include <atomic>
 #include <cstddef>
 #include <cstring>
+#include <new>
 #include <type_traits>
 #include <vector>
 #include "common/common_types.h"
@@ -19,31 +20,31 @@ namespace Common {
 /// @tparam T            Element type
 /// @tparam capacity     Number of slots in ring buffer
 /// @tparam granularity  Slot size in terms of number of elements
-template <typename T, size_t capacity, size_t granularity = 1>
+template <typename T, std::size_t capacity, std::size_t granularity = 1>
 class RingBuffer {
     /// A "slot" is made of `granularity` elements of `T`.
-    static constexpr size_t slot_size = granularity * sizeof(T);
+    static constexpr std::size_t slot_size = granularity * sizeof(T);
     // T must be safely memcpy-able and have a trivial default constructor.
     static_assert(std::is_trivial_v<T>);
     // Ensure capacity is sensible.
-    static_assert(capacity < std::numeric_limits<size_t>::max() / 2 / granularity);
+    static_assert(capacity < std::numeric_limits<std::size_t>::max() / 2 / granularity);
     static_assert((capacity & (capacity - 1)) == 0, "capacity must be a power of two");
     // Ensure lock-free.
-    static_assert(std::atomic<size_t>::is_always_lock_free);
+    static_assert(std::atomic_size_t::is_always_lock_free);
 
 public:
     /// Pushes slots into the ring buffer
     /// @param new_slots   Pointer to the slots to push
     /// @param slot_count  Number of slots to push
     /// @returns The number of slots actually pushed
-    size_t Push(const void* new_slots, size_t slot_count) {
-        const size_t write_index = m_write_index.load();
-        const size_t slots_free = capacity + m_read_index.load() - write_index;
-        const size_t push_count = std::min(slot_count, slots_free);
+    std::size_t Push(const void* new_slots, std::size_t slot_count) {
+        const std::size_t write_index = m_write_index.load();
+        const std::size_t slots_free = capacity + m_read_index.load() - write_index;
+        const std::size_t push_count = std::min(slot_count, slots_free);
 
-        const size_t pos = write_index % capacity;
-        const size_t first_copy = std::min(capacity - pos, push_count);
-        const size_t second_copy = push_count - first_copy;
+        const std::size_t pos = write_index % capacity;
+        const std::size_t first_copy = std::min(capacity - pos, push_count);
+        const std::size_t second_copy = push_count - first_copy;
 
         const char* in = static_cast<const char*>(new_slots);
         std::memcpy(m_data.data() + pos * granularity, in, first_copy * slot_size);
@@ -55,7 +56,7 @@ public:
         return push_count;
     }
 
-    size_t Push(const std::vector<T>& input) {
+    std::size_t Push(const std::vector<T>& input) {
         return Push(input.data(), input.size());
     }
 
@@ -63,14 +64,14 @@ public:
     /// @param output     Where to store the popped slots
     /// @param max_slots  Maximum number of slots to pop
     /// @returns The number of slots actually popped
-    size_t Pop(void* output, size_t max_slots = ~size_t(0)) {
-        const size_t read_index = m_read_index.load();
-        const size_t slots_filled = m_write_index.load() - read_index;
-        const size_t pop_count = std::min(slots_filled, max_slots);
+    std::size_t Pop(void* output, std::size_t max_slots = ~std::size_t(0)) {
+        const std::size_t read_index = m_read_index.load();
+        const std::size_t slots_filled = m_write_index.load() - read_index;
+        const std::size_t pop_count = std::min(slots_filled, max_slots);
 
-        const size_t pos = read_index % capacity;
-        const size_t first_copy = std::min(capacity - pos, pop_count);
-        const size_t second_copy = pop_count - first_copy;
+        const std::size_t pos = read_index % capacity;
+        const std::size_t first_copy = std::min(capacity - pos, pop_count);
+        const std::size_t second_copy = pop_count - first_copy;
 
         char* out = static_cast<char*>(output);
         std::memcpy(out, m_data.data() + pos * granularity, first_copy * slot_size);
@@ -82,28 +83,35 @@ public:
         return pop_count;
     }
 
-    std::vector<T> Pop(size_t max_slots = ~size_t(0)) {
+    std::vector<T> Pop(std::size_t max_slots = ~std::size_t(0)) {
         std::vector<T> out(std::min(max_slots, capacity) * granularity);
-        const size_t count = Pop(out.data(), out.size() / granularity);
+        const std::size_t count = Pop(out.data(), out.size() / granularity);
         out.resize(count * granularity);
         return out;
     }
 
     /// @returns Number of slots used
-    size_t Size() const {
+    std::size_t Size() const {
         return m_write_index.load() - m_read_index.load();
     }
 
     /// @returns Maximum size of ring buffer
-    constexpr size_t Capacity() const {
+    constexpr std::size_t Capacity() const {
         return capacity;
     }
 
 private:
     // It is important to align the below variables for performance reasons:
     // Having them on the same cache-line would result in false-sharing between them.
-    alignas(128) std::atomic<size_t> m_read_index{0};
-    alignas(128) std::atomic<size_t> m_write_index{0};
+    // TODO: Remove this ifdef whenever clang and GCC support
+    //       std::hardware_destructive_interference_size.
+#if defined(_MSC_VER) && _MSC_VER >= 1911
+    alignas(std::hardware_destructive_interference_size) std::atomic_size_t m_read_index{0};
+    alignas(std::hardware_destructive_interference_size) std::atomic_size_t m_write_index{0};
+#else
+    alignas(128) std::atomic_size_t m_read_index{0};
+    alignas(128) std::atomic_size_t m_write_index{0};
+#endif
 
     std::array<T, granularity * capacity> m_data;
 };
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index 0ca663032..c9a5425a7 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -37,7 +37,7 @@ std::string ToUpper(std::string str) {
 }
 
 // For Debugging. Read out an u8 array.
-std::string ArrayToString(const u8* data, size_t size, int line_len, bool spaces) {
+std::string ArrayToString(const u8* data, std::size_t size, int line_len, bool spaces) {
     std::ostringstream oss;
     oss << std::setfill('0') << std::hex;
 
@@ -60,7 +60,7 @@ std::string StringFromBuffer(const std::vector<u8>& data) {
 
 // Turns "  hej " into "hej". Also handles tabs.
 std::string StripSpaces(const std::string& str) {
-    const size_t s = str.find_first_not_of(" \t\r\n");
+    const std::size_t s = str.find_first_not_of(" \t\r\n");
 
     if (str.npos != s)
         return str.substr(s, str.find_last_not_of(" \t\r\n") - s + 1);
@@ -121,10 +121,10 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _
     if (full_path.empty())
         return false;
 
-    size_t dir_end = full_path.find_last_of("/"
+    std::size_t dir_end = full_path.find_last_of("/"
 // windows needs the : included for something like just "C:" to be considered a directory
 #ifdef _WIN32
-                                            "\\:"
+                                                 "\\:"
 #endif
     );
     if (std::string::npos == dir_end)
@@ -132,7 +132,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _
     else
         dir_end += 1;
 
-    size_t fname_end = full_path.rfind('.');
+    std::size_t fname_end = full_path.rfind('.');
     if (fname_end < dir_end || std::string::npos == fname_end)
         fname_end = full_path.size();
 
@@ -172,7 +172,7 @@ void SplitString(const std::string& str, const char delim, std::vector<std::stri
 }
 
 std::string TabsToSpaces(int tab_size, std::string in) {
-    size_t i = 0;
+    std::size_t i = 0;
 
     while ((i = in.find('\t')) != std::string::npos) {
         in.replace(i, 1, tab_size, ' ');
@@ -182,7 +182,7 @@ std::string TabsToSpaces(int tab_size, std::string in) {
 }
 
 std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest) {
-    size_t pos = 0;
+    std::size_t pos = 0;
 
     if (src == dest)
         return result;
@@ -280,22 +280,22 @@ static std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>&
         return {};
     }
 
-    const size_t in_bytes = sizeof(T) * input.size();
+    const std::size_t in_bytes = sizeof(T) * input.size();
     // Multiply by 4, which is the max number of bytes to encode a codepoint
-    const size_t out_buffer_size = 4 * in_bytes;
+    const std::size_t out_buffer_size = 4 * in_bytes;
 
     std::string out_buffer(out_buffer_size, '\0');
 
     auto src_buffer = &input[0];
-    size_t src_bytes = in_bytes;
+    std::size_t src_bytes = in_bytes;
     auto dst_buffer = &out_buffer[0];
-    size_t dst_bytes = out_buffer.size();
+    std::size_t dst_bytes = out_buffer.size();
 
     while (0 != src_bytes) {
-        size_t const iconv_result =
+        std::size_t const iconv_result =
             iconv(conv_desc, (char**)(&src_buffer), &src_bytes, &dst_buffer, &dst_bytes);
 
-        if (static_cast<size_t>(-1) == iconv_result) {
+        if (static_cast<std::size_t>(-1) == iconv_result) {
             if (EILSEQ == errno || EINVAL == errno) {
                 // Try to skip the bad character
                 if (0 != src_bytes) {
@@ -326,22 +326,22 @@ std::u16string UTF8ToUTF16(const std::string& input) {
         return {};
     }
 
-    const size_t in_bytes = sizeof(char) * input.size();
+    const std::size_t in_bytes = sizeof(char) * input.size();
     // Multiply by 4, which is the max number of bytes to encode a codepoint
-    const size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes;
+    const std::size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes;
 
     std::u16string out_buffer(out_buffer_size, char16_t{});
 
     char* src_buffer = const_cast<char*>(&input[0]);
-    size_t src_bytes = in_bytes;
+    std::size_t src_bytes = in_bytes;
     char* dst_buffer = (char*)(&out_buffer[0]);
-    size_t dst_bytes = out_buffer.size();
+    std::size_t dst_bytes = out_buffer.size();
 
     while (0 != src_bytes) {
-        size_t const iconv_result =
+        std::size_t const iconv_result =
             iconv(conv_desc, &src_buffer, &src_bytes, &dst_buffer, &dst_bytes);
 
-        if (static_cast<size_t>(-1) == iconv_result) {
+        if (static_cast<std::size_t>(-1) == iconv_result) {
             if (EILSEQ == errno || EINVAL == errno) {
                 // Try to skip the bad character
                 if (0 != src_bytes) {
@@ -381,8 +381,8 @@ std::string SHIFTJISToUTF8(const std::string& input) {
 
 #endif
 
-std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, size_t max_len) {
-    size_t len = 0;
+std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, std::size_t max_len) {
+    std::size_t len = 0;
     while (len < max_len && buffer[len] != '\0')
         ++len;
 
diff --git a/src/common/string_util.h b/src/common/string_util.h
index 4a2143b59..dcca6bc38 100644
--- a/src/common/string_util.h
+++ b/src/common/string_util.h
@@ -19,7 +19,7 @@ std::string ToLower(std::string str);
 /// Make a string uppercase
 std::string ToUpper(std::string str);
 
-std::string ArrayToString(const u8* data, size_t size, int line_len = 20, bool spaces = true);
+std::string ArrayToString(const u8* data, std::size_t size, int line_len = 20, bool spaces = true);
 
 std::string StringFromBuffer(const std::vector<u8>& data);
 
@@ -118,7 +118,7 @@ bool ComparePartialString(InIt begin, InIt end, const char* other) {
  * Creates a std::string from a fixed-size NUL-terminated char buffer. If the buffer isn't
  * NUL-terminated then the string ends at max_len characters.
  */
-std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, size_t max_len);
+std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, std::size_t max_len);
 
 /**
  * Attempts to trim an arbitrary prefix from `path`, leaving only the part starting at `root`. It's
diff --git a/src/common/thread.h b/src/common/thread.h
index 9465e1de7..6cbdb96a3 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -60,12 +60,12 @@ private:
 
 class Barrier {
 public:
-    explicit Barrier(size_t count_) : count(count_), waiting(0), generation(0) {}
+    explicit Barrier(std::size_t count_) : count(count_), waiting(0), generation(0) {}
 
     /// Blocks until all "count" threads have called Sync()
     void Sync() {
         std::unique_lock<std::mutex> lk(mutex);
-        const size_t current_generation = generation;
+        const std::size_t current_generation = generation;
 
         if (++waiting == count) {
             generation++;
@@ -80,21 +80,13 @@ public:
 private:
     std::condition_variable condvar;
     std::mutex mutex;
-    const size_t count;
-    size_t waiting;
-    size_t generation; // Incremented once each time the barrier is used
+    const std::size_t count;
+    std::size_t waiting;
+    std::size_t generation; // Incremented once each time the barrier is used
 };
 
 void SleepCurrentThread(int ms);
 void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms
-
-// Use this function during a spin-wait to make the current thread
-// relax while another thread is working. This may be more efficient
-// than using events because event functions use kernel calls.
-inline void YieldCPU() {
-    std::this_thread::yield();
-}
-
 void SetCurrentThreadName(const char* name);
 
 } // namespace Common
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 927da9187..636a5c0f9 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -97,7 +97,7 @@ const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({
     Xbyak::util::xmm15,
 });
 
-constexpr size_t ABI_SHADOW_SPACE = 0x20;
+constexpr std::size_t ABI_SHADOW_SPACE = 0x20;
 
 #else
 
@@ -147,22 +147,23 @@ const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({
     Xbyak::util::r15,
 });
 
-constexpr size_t ABI_SHADOW_SPACE = 0;
+constexpr std::size_t ABI_SHADOW_SPACE = 0;
 
 #endif
 
-inline void ABI_CalculateFrameSize(BitSet32 regs, size_t rsp_alignment, size_t needed_frame_size,
-                                   s32* out_subtraction, s32* out_xmm_offset) {
+inline void ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment,
+                                   std::size_t needed_frame_size, s32* out_subtraction,
+                                   s32* out_xmm_offset) {
     int count = (regs & ABI_ALL_GPRS).Count();
     rsp_alignment -= count * 8;
-    size_t subtraction = 0;
+    std::size_t subtraction = 0;
     int xmm_count = (regs & ABI_ALL_XMMS).Count();
     if (xmm_count) {
         // If we have any XMMs to save, we must align the stack here.
         subtraction = rsp_alignment & 0xF;
     }
     subtraction += 0x10 * xmm_count;
-    size_t xmm_base_subtraction = subtraction;
+    std::size_t xmm_base_subtraction = subtraction;
     subtraction += needed_frame_size;
     subtraction += ABI_SHADOW_SPACE;
     // Final alignment.
@@ -173,8 +174,9 @@ inline void ABI_CalculateFrameSize(BitSet32 regs, size_t rsp_alignment, size_t n
     *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
 }
 
-inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
-                                              size_t rsp_alignment, size_t needed_frame_size = 0) {
+inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
+                                                   std::size_t rsp_alignment,
+                                                   std::size_t needed_frame_size = 0) {
     s32 subtraction, xmm_offset;
     ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
 
@@ -195,7 +197,8 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet
 }
 
 inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
-                                           size_t rsp_alignment, size_t needed_frame_size = 0) {
+                                           std::size_t rsp_alignment,
+                                           std::size_t needed_frame_size = 0) {
     s32 subtraction, xmm_offset;
     ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
 
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
index 02323a017..5cc8a8c76 100644
--- a/src/common/x64/xbyak_util.h
+++ b/src/common/x64/xbyak_util.h
@@ -34,7 +34,7 @@ inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
 template <typename T>
 inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
     static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
-    size_t addr = reinterpret_cast<size_t>(f);
+    std::size_t addr = reinterpret_cast<std::size_t>(f);
     if (IsWithin2G(code, addr)) {
         code.call(f);
     } else {
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 26f727d96..23fd6e920 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -32,6 +32,8 @@ add_library(core STATIC
     file_sys/control_metadata.h
     file_sys/directory.h
     file_sys/errors.h
+    file_sys/fsmitm_romfsbuild.cpp
+    file_sys/fsmitm_romfsbuild.h
     file_sys/mode.h
     file_sys/nca_metadata.cpp
     file_sys/nca_metadata.h
@@ -59,10 +61,13 @@ add_library(core STATIC
     file_sys/vfs.h
     file_sys/vfs_concat.cpp
     file_sys/vfs_concat.h
+    file_sys/vfs_layered.cpp
+    file_sys/vfs_layered.h
     file_sys/vfs_offset.cpp
     file_sys/vfs_offset.h
     file_sys/vfs_real.cpp
     file_sys/vfs_real.h
+    file_sys/vfs_static.h
     file_sys/vfs_vector.cpp
     file_sys/vfs_vector.h
     file_sys/xts_archive.cpp
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index c368745b1..59da33f30 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -6,11 +6,14 @@
 
 #include <array>
 #include "common/common_types.h"
-#include "core/hle/kernel/vm_manager.h"
+
+namespace Kernel {
+enum class VMAPermission : u8;
+}
 
 namespace Core {
 
-/// Generic ARM11 CPU interface
+/// Generic ARMv8 CPU interface
 class ARM_Interface : NonCopyable {
 public:
     virtual ~ARM_Interface() {}
@@ -19,10 +22,16 @@ public:
         std::array<u64, 31> cpu_registers;
         u64 sp;
         u64 pc;
-        u64 cpsr;
-        std::array<u128, 32> fpu_registers;
-        u64 fpscr;
+        u32 pstate;
+        std::array<u8, 4> padding;
+        std::array<u128, 32> vector_registers;
+        u32 fpcr;
+        u32 fpsr;
+        u64 tpidr;
     };
+    // Internally within the kernel, it expects the AArch64 version of the
+    // thread context to be 800 bytes in size.
+    static_assert(sizeof(ThreadContext) == 0x320);
 
     /// Runs the CPU until an event happens
     virtual void Run() = 0;
@@ -31,11 +40,11 @@ public:
     virtual void Step() = 0;
 
     /// Maps a backing memory region for the CPU
-    virtual void MapBackingMemory(VAddr address, size_t size, u8* memory,
+    virtual void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                                   Kernel::VMAPermission perms) = 0;
 
     /// Unmaps a region of memory that was previously mapped using MapBackingMemory
-    virtual void UnmapMemory(VAddr address, size_t size) = 0;
+    virtual void UnmapMemory(VAddr address, std::size_t size) = 0;
 
     /// Clear all instruction cache
     virtual void ClearInstructionCache() = 0;
@@ -69,42 +78,50 @@ public:
      */
     virtual void SetReg(int index, u64 value) = 0;
 
-    virtual u128 GetExtReg(int index) const = 0;
-
-    virtual void SetExtReg(int index, u128 value) = 0;
-
     /**
-     * Gets the value of a VFP register
-     * @param index Register index (0-31)
-     * @return Returns the value in the register
+     * Gets the value of a specified vector register.
+     *
+     * @param index The index of the vector register.
+     * @return the value within the vector register.
      */
-    virtual u32 GetVFPReg(int index) const = 0;
+    virtual u128 GetVectorReg(int index) const = 0;
 
     /**
-     * Sets a VFP register to the given value
-     * @param index Register index (0-31)
-     * @param value Value to set register to
+     * Sets a given value into a vector register.
+     *
+     * @param index The index of the vector register.
+     * @param value The new value to place in the register.
      */
-    virtual void SetVFPReg(int index, u32 value) = 0;
+    virtual void SetVectorReg(int index, u128 value) = 0;
 
     /**
-     * Get the current CPSR register
-     * @return Returns the value of the CPSR register
+     * Get the current PSTATE register
+     * @return Returns the value of the PSTATE register
      */
-    virtual u32 GetCPSR() const = 0;
+    virtual u32 GetPSTATE() const = 0;
 
     /**
-     * Set the current CPSR register
-     * @param cpsr Value to set CPSR to
+     * Set the current PSTATE register
+     * @param pstate Value to set PSTATE to
      */
-    virtual void SetCPSR(u32 cpsr) = 0;
+    virtual void SetPSTATE(u32 pstate) = 0;
 
     virtual VAddr GetTlsAddress() const = 0;
 
     virtual void SetTlsAddress(VAddr address) = 0;
 
+    /**
+     * Gets the value within the TPIDR_EL0 (read/write software thread ID) register.
+     *
+     * @return the value within the register.
+     */
     virtual u64 GetTPIDR_EL0() const = 0;
 
+    /**
+     * Sets a new value within the TPIDR_EL0 (read/write software thread ID) register.
+     *
+     * @param value The new value to place in the register.
+     */
     virtual void SetTPIDR_EL0(u64 value) = 0;
 
     /**
@@ -119,6 +136,7 @@ public:
      */
     virtual void LoadContext(const ThreadContext& ctx) = 0;
 
+    /// Clears the exclusive monitor's state.
     virtual void ClearExclusiveState() = 0;
 
     /// Prepare core for thread reschedule (if needed to correctly handle state)
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index b47f04988..05cc84458 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,8 +12,10 @@
 #include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
+#include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
+#include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
 
 namespace Core {
@@ -58,7 +60,7 @@ public:
         Memory::Write64(vaddr + 8, value[1]);
     }
 
-    void InterpreterFallback(u64 pc, size_t num_instructions) override {
+    void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
         LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc,
                  num_instructions, MemoryReadCode(pc));
 
@@ -79,9 +81,20 @@ public:
         case Dynarmic::A64::Exception::SendEventLocal:
         case Dynarmic::A64::Exception::Yield:
             return;
+        case Dynarmic::A64::Exception::Breakpoint:
+            if (GDBStub::IsServerEnabled()) {
+                parent.jit->HaltExecution();
+                parent.SetPC(pc);
+                Kernel::Thread* thread = Kernel::GetCurrentThread();
+                parent.SaveContext(thread->context);
+                GDBStub::Break();
+                GDBStub::SendTrap(thread, 5);
+                return;
+            }
+            [[fallthrough]];
         default:
             ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:X})",
-                       static_cast<size_t>(exception), pc);
+                       static_cast<std::size_t>(exception), pc);
         }
     }
 
@@ -110,13 +123,14 @@ public:
     }
 
     ARM_Dynarmic& parent;
-    size_t num_interpreted_instructions = 0;
+    std::size_t num_interpreted_instructions = 0;
     u64 tpidrro_el0 = 0;
     u64 tpidr_el0 = 0;
 };
 
 std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
-    auto** const page_table = Core::CurrentProcess()->vm_manager.page_table.pointers.data();
+    auto& current_process = Core::CurrentProcess();
+    auto** const page_table = current_process->VMManager().page_table.pointers.data();
 
     Dynarmic::A64::UserConfig config;
 
@@ -125,7 +139,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
 
     // Memory
     config.page_table = reinterpret_cast<void**>(page_table);
-    config.page_table_address_space_bits = Memory::ADDRESS_SPACE_BITS;
+    config.page_table_address_space_bits = current_process->VMManager().GetAddressSpaceWidth();
     config.silently_mirror_page_table = false;
 
     // Multi-process state
@@ -157,10 +171,11 @@ void ARM_Dynarmic::Step() {
     cb->InterpreterFallback(jit->GetPC(), 1);
 }
 
-ARM_Dynarmic::ARM_Dynarmic(std::shared_ptr<ExclusiveMonitor> exclusive_monitor, size_t core_index)
+ARM_Dynarmic::ARM_Dynarmic(std::shared_ptr<ExclusiveMonitor> exclusive_monitor,
+                           std::size_t core_index)
     : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index},
       exclusive_monitor{std::dynamic_pointer_cast<DynarmicExclusiveMonitor>(exclusive_monitor)} {
-    ThreadContext ctx;
+    ThreadContext ctx{};
     inner_unicorn.SaveContext(ctx);
     PageTableChanged();
     LoadContext(ctx);
@@ -168,12 +183,12 @@ ARM_Dynarmic::ARM_Dynarmic(std::shared_ptr<ExclusiveMonitor> exclusive_monitor,
 
 ARM_Dynarmic::~ARM_Dynarmic() = default;
 
-void ARM_Dynarmic::MapBackingMemory(u64 address, size_t size, u8* memory,
+void ARM_Dynarmic::MapBackingMemory(u64 address, std::size_t size, u8* memory,
                                     Kernel::VMAPermission perms) {
     inner_unicorn.MapBackingMemory(address, size, memory, perms);
 }
 
-void ARM_Dynarmic::UnmapMemory(u64 address, size_t size) {
+void ARM_Dynarmic::UnmapMemory(u64 address, std::size_t size) {
     inner_unicorn.UnmapMemory(address, size);
 }
 
@@ -193,29 +208,20 @@ void ARM_Dynarmic::SetReg(int index, u64 value) {
     jit->SetRegister(index, value);
 }
 
-u128 ARM_Dynarmic::GetExtReg(int index) const {
+u128 ARM_Dynarmic::GetVectorReg(int index) const {
     return jit->GetVector(index);
 }
 
-void ARM_Dynarmic::SetExtReg(int index, u128 value) {
+void ARM_Dynarmic::SetVectorReg(int index, u128 value) {
     jit->SetVector(index, value);
 }
 
-u32 ARM_Dynarmic::GetVFPReg(int /*index*/) const {
-    UNIMPLEMENTED();
-    return {};
-}
-
-void ARM_Dynarmic::SetVFPReg(int /*index*/, u32 /*value*/) {
-    UNIMPLEMENTED();
-}
-
-u32 ARM_Dynarmic::GetCPSR() const {
+u32 ARM_Dynarmic::GetPSTATE() const {
     return jit->GetPstate();
 }
 
-void ARM_Dynarmic::SetCPSR(u32 cpsr) {
-    jit->SetPstate(cpsr);
+void ARM_Dynarmic::SetPSTATE(u32 pstate) {
+    jit->SetPstate(pstate);
 }
 
 u64 ARM_Dynarmic::GetTlsAddress() const {
@@ -238,18 +244,22 @@ void ARM_Dynarmic::SaveContext(ThreadContext& ctx) {
     ctx.cpu_registers = jit->GetRegisters();
     ctx.sp = jit->GetSP();
     ctx.pc = jit->GetPC();
-    ctx.cpsr = jit->GetPstate();
-    ctx.fpu_registers = jit->GetVectors();
-    ctx.fpscr = jit->GetFpcr();
+    ctx.pstate = jit->GetPstate();
+    ctx.vector_registers = jit->GetVectors();
+    ctx.fpcr = jit->GetFpcr();
+    ctx.fpsr = jit->GetFpsr();
+    ctx.tpidr = cb->tpidr_el0;
 }
 
 void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) {
     jit->SetRegisters(ctx.cpu_registers);
     jit->SetSP(ctx.sp);
     jit->SetPC(ctx.pc);
-    jit->SetPstate(static_cast<u32>(ctx.cpsr));
-    jit->SetVectors(ctx.fpu_registers);
-    jit->SetFpcr(static_cast<u32>(ctx.fpscr));
+    jit->SetPstate(ctx.pstate);
+    jit->SetVectors(ctx.vector_registers);
+    jit->SetFpcr(ctx.fpcr);
+    jit->SetFpsr(ctx.fpsr);
+    SetTPIDR_EL0(ctx.tpidr);
 }
 
 void ARM_Dynarmic::PrepareReschedule() {
@@ -269,10 +279,10 @@ void ARM_Dynarmic::PageTableChanged() {
     current_page_table = Memory::GetCurrentPageTable();
 }
 
-DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(size_t core_count) : monitor(core_count) {}
+DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}
 DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
 
-void DynarmicExclusiveMonitor::SetExclusive(size_t core_index, VAddr addr) {
+void DynarmicExclusiveMonitor::SetExclusive(std::size_t core_index, VAddr addr) {
     // Size doesn't actually matter.
     monitor.Mark(core_index, addr, 16);
 }
@@ -281,30 +291,30 @@ void DynarmicExclusiveMonitor::ClearExclusive() {
     monitor.Clear();
 }
 
-bool DynarmicExclusiveMonitor::ExclusiveWrite8(size_t core_index, VAddr vaddr, u8 value) {
+bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
     return monitor.DoExclusiveOperation(core_index, vaddr, 1,
                                         [&] { Memory::Write8(vaddr, value); });
 }
 
-bool DynarmicExclusiveMonitor::ExclusiveWrite16(size_t core_index, VAddr vaddr, u16 value) {
+bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
     return monitor.DoExclusiveOperation(core_index, vaddr, 2,
                                         [&] { Memory::Write16(vaddr, value); });
 }
 
-bool DynarmicExclusiveMonitor::ExclusiveWrite32(size_t core_index, VAddr vaddr, u32 value) {
+bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
     return monitor.DoExclusiveOperation(core_index, vaddr, 4,
                                         [&] { Memory::Write32(vaddr, value); });
 }
 
-bool DynarmicExclusiveMonitor::ExclusiveWrite64(size_t core_index, VAddr vaddr, u64 value) {
+bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
     return monitor.DoExclusiveOperation(core_index, vaddr, 8,
                                         [&] { Memory::Write64(vaddr, value); });
 }
 
-bool DynarmicExclusiveMonitor::ExclusiveWrite128(size_t core_index, VAddr vaddr, u128 value) {
+bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
     return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] {
-        Memory::Write64(vaddr, value[0]);
-        Memory::Write64(vaddr, value[1]);
+        Memory::Write64(vaddr + 0, value[0]);
+        Memory::Write64(vaddr + 8, value[1]);
     });
 }
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 3bdfd8cd9..4ee92ee27 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,6 +12,10 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"
 
+namespace Memory {
+struct PageTable;
+}
+
 namespace Core {
 
 class ARM_Dynarmic_Callbacks;
@@ -19,24 +23,22 @@ class DynarmicExclusiveMonitor;
 
 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    ARM_Dynarmic(std::shared_ptr<ExclusiveMonitor> exclusive_monitor, size_t core_index);
+    ARM_Dynarmic(std::shared_ptr<ExclusiveMonitor> exclusive_monitor, std::size_t core_index);
     ~ARM_Dynarmic();
 
-    void MapBackingMemory(VAddr address, size_t size, u8* memory,
+    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                           Kernel::VMAPermission perms) override;
-    void UnmapMemory(u64 address, size_t size) override;
+    void UnmapMemory(u64 address, std::size_t size) override;
     void SetPC(u64 pc) override;
     u64 GetPC() const override;
     u64 GetReg(int index) const override;
     void SetReg(int index, u64 value) override;
-    u128 GetExtReg(int index) const override;
-    void SetExtReg(int index, u128 value) override;
-    u32 GetVFPReg(int index) const override;
-    void SetVFPReg(int index, u32 value) override;
-    u32 GetCPSR() const override;
+    u128 GetVectorReg(int index) const override;
+    void SetVectorReg(int index, u128 value) override;
+    u32 GetPSTATE() const override;
+    void SetPSTATE(u32 pstate) override;
     void Run() override;
     void Step() override;
-    void SetCPSR(u32 cpsr) override;
     VAddr GetTlsAddress() const override;
     void SetTlsAddress(VAddr address) override;
     void SetTPIDR_EL0(u64 value) override;
@@ -59,7 +61,7 @@ private:
     std::unique_ptr<Dynarmic::A64::Jit> jit;
     ARM_Unicorn inner_unicorn;
 
-    size_t core_index;
+    std::size_t core_index;
     std::shared_ptr<DynarmicExclusiveMonitor> exclusive_monitor;
 
     Memory::PageTable* current_page_table = nullptr;
@@ -67,17 +69,17 @@ private:
 
 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
 public:
-    explicit DynarmicExclusiveMonitor(size_t core_count);
+    explicit DynarmicExclusiveMonitor(std::size_t core_count);
     ~DynarmicExclusiveMonitor();
 
-    void SetExclusive(size_t core_index, VAddr addr) override;
+    void SetExclusive(std::size_t core_index, VAddr addr) override;
     void ClearExclusive() override;
 
-    bool ExclusiveWrite8(size_t core_index, VAddr vaddr, u8 value) override;
-    bool ExclusiveWrite16(size_t core_index, VAddr vaddr, u16 value) override;
-    bool ExclusiveWrite32(size_t core_index, VAddr vaddr, u32 value) override;
-    bool ExclusiveWrite64(size_t core_index, VAddr vaddr, u64 value) override;
-    bool ExclusiveWrite128(size_t core_index, VAddr vaddr, u128 value) override;
+    bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
+    bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
+    bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override;
+    bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override;
+    bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override;
 
 private:
     friend class ARM_Dynarmic;
diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h
index 6f9b51573..f59aca667 100644
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@@ -12,14 +12,14 @@ class ExclusiveMonitor {
 public:
     virtual ~ExclusiveMonitor();
 
-    virtual void SetExclusive(size_t core_index, VAddr addr) = 0;
+    virtual void SetExclusive(std::size_t core_index, VAddr addr) = 0;
     virtual void ClearExclusive() = 0;
 
-    virtual bool ExclusiveWrite8(size_t core_index, VAddr vaddr, u8 value) = 0;
-    virtual bool ExclusiveWrite16(size_t core_index, VAddr vaddr, u16 value) = 0;
-    virtual bool ExclusiveWrite32(size_t core_index, VAddr vaddr, u32 value) = 0;
-    virtual bool ExclusiveWrite64(size_t core_index, VAddr vaddr, u64 value) = 0;
-    virtual bool ExclusiveWrite128(size_t core_index, VAddr vaddr, u128 value) = 0;
+    virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
+    virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;
+    virtual bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) = 0;
+    virtual bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) = 0;
+    virtual bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) = 0;
 };
 
 } // namespace Core
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 4c4de2623..e218a0b15 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -90,12 +90,12 @@ ARM_Unicorn::~ARM_Unicorn() {
     CHECKED(uc_close(uc));
 }
 
-void ARM_Unicorn::MapBackingMemory(VAddr address, size_t size, u8* memory,
+void ARM_Unicorn::MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                                    Kernel::VMAPermission perms) {
     CHECKED(uc_mem_map_ptr(uc, address, size, static_cast<u32>(perms), memory));
 }
 
-void ARM_Unicorn::UnmapMemory(VAddr address, size_t size) {
+void ARM_Unicorn::UnmapMemory(VAddr address, std::size_t size) {
     CHECKED(uc_mem_unmap(uc, address, size));
 }
 
@@ -131,33 +131,24 @@ void ARM_Unicorn::SetReg(int regn, u64 val) {
     CHECKED(uc_reg_write(uc, treg, &val));
 }
 
-u128 ARM_Unicorn::GetExtReg(int /*index*/) const {
+u128 ARM_Unicorn::GetVectorReg(int /*index*/) const {
     UNIMPLEMENTED();
     static constexpr u128 res{};
     return res;
 }
 
-void ARM_Unicorn::SetExtReg(int /*index*/, u128 /*value*/) {
+void ARM_Unicorn::SetVectorReg(int /*index*/, u128 /*value*/) {
     UNIMPLEMENTED();
 }
 
-u32 ARM_Unicorn::GetVFPReg(int /*index*/) const {
-    UNIMPLEMENTED();
-    return {};
-}
-
-void ARM_Unicorn::SetVFPReg(int /*index*/, u32 /*value*/) {
-    UNIMPLEMENTED();
-}
-
-u32 ARM_Unicorn::GetCPSR() const {
+u32 ARM_Unicorn::GetPSTATE() const {
     u64 nzcv{};
     CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &nzcv));
     return static_cast<u32>(nzcv);
 }
 
-void ARM_Unicorn::SetCPSR(u32 cpsr) {
-    u64 nzcv = cpsr;
+void ARM_Unicorn::SetPSTATE(u32 pstate) {
+    u64 nzcv = pstate;
     CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &nzcv));
 }
 
@@ -219,7 +210,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) {
 
     CHECKED(uc_reg_read(uc, UC_ARM64_REG_SP, &ctx.sp));
     CHECKED(uc_reg_read(uc, UC_ARM64_REG_PC, &ctx.pc));
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &ctx.cpsr));
+    CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &ctx.pstate));
 
     for (auto i = 0; i < 29; ++i) {
         uregs[i] = UC_ARM64_REG_X0 + i;
@@ -234,7 +225,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) {
 
     for (int i = 0; i < 32; ++i) {
         uregs[i] = UC_ARM64_REG_Q0 + i;
-        tregs[i] = &ctx.fpu_registers[i];
+        tregs[i] = &ctx.vector_registers[i];
     }
 
     CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32));
@@ -246,7 +237,7 @@ void ARM_Unicorn::LoadContext(const ThreadContext& ctx) {
 
     CHECKED(uc_reg_write(uc, UC_ARM64_REG_SP, &ctx.sp));
     CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &ctx.pc));
-    CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &ctx.cpsr));
+    CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &ctx.pstate));
 
     for (int i = 0; i < 29; ++i) {
         uregs[i] = UC_ARM64_REG_X0 + i;
@@ -261,7 +252,7 @@ void ARM_Unicorn::LoadContext(const ThreadContext& ctx) {
 
     for (auto i = 0; i < 32; ++i) {
         uregs[i] = UC_ARM64_REG_Q0 + i;
-        tregs[i] = (void*)&ctx.fpu_registers[i];
+        tregs[i] = (void*)&ctx.vector_registers[i];
     }
 
     CHECKED(uc_reg_write_batch(uc, uregs, tregs, 32));
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index bd6b2f723..75761950b 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -15,19 +15,17 @@ class ARM_Unicorn final : public ARM_Interface {
 public:
     ARM_Unicorn();
     ~ARM_Unicorn();
-    void MapBackingMemory(VAddr address, size_t size, u8* memory,
+    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                           Kernel::VMAPermission perms) override;
-    void UnmapMemory(VAddr address, size_t size) override;
+    void UnmapMemory(VAddr address, std::size_t size) override;
     void SetPC(u64 pc) override;
     u64 GetPC() const override;
     u64 GetReg(int index) const override;
     void SetReg(int index, u64 value) override;
-    u128 GetExtReg(int index) const override;
-    void SetExtReg(int index, u128 value) override;
-    u32 GetVFPReg(int index) const override;
-    void SetVFPReg(int index, u32 value) override;
-    u32 GetCPSR() const override;
-    void SetCPSR(u32 cpsr) override;
+    u128 GetVectorReg(int index) const override;
+    void SetVectorReg(int index, u128 value) override;
+    u32 GetPSTATE() const override;
+    void SetPSTATE(u32 pstate) override;
     VAddr GetTlsAddress() const override;
     void SetTlsAddress(VAddr address) override;
     void SetTPIDR_EL0(u64 value) override;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 713ee17c1..b6acfb3e4 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -64,7 +64,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
         if (concat.empty())
             return nullptr;
 
-        return FileSys::ConcatenateFiles(concat, dir->GetName());
+        return FileSys::ConcatenatedVfsFile::MakeConcatenatedFile(concat, dir->GetName());
     }
 
     return vfs->OpenFile(path, FileSys::Mode::Read);
@@ -140,7 +140,7 @@ struct System::Impl {
 
         cpu_barrier = std::make_shared<CpuBarrier>();
         cpu_exclusive_monitor = Cpu::MakeExclusiveMonitor(cpu_cores.size());
-        for (size_t index = 0; index < cpu_cores.size(); ++index) {
+        for (std::size_t index = 0; index < cpu_cores.size(); ++index) {
             cpu_cores[index] = std::make_shared<Cpu>(cpu_exclusive_monitor, cpu_barrier, index);
         }
 
@@ -161,7 +161,7 @@ struct System::Impl {
         // CPU core 0 is run on the main thread
         thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
         if (Settings::values.use_multi_core) {
-            for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
+            for (std::size_t index = 0; index < cpu_core_threads.size(); ++index) {
                 cpu_core_threads[index] =
                     std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
                 thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
@@ -202,7 +202,7 @@ struct System::Impl {
             return init_result;
         }
 
-        const Loader::ResultStatus load_result{app_loader->Load(kernel.CurrentProcess())};
+        const Loader::ResultStatus load_result{app_loader->Load(*kernel.CurrentProcess())};
         if (load_result != Loader::ResultStatus::Success) {
             LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result));
             Shutdown();
@@ -285,7 +285,7 @@ struct System::Impl {
     std::shared_ptr<CpuBarrier> cpu_barrier;
     std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
     std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;
-    size_t active_core{}; ///< Active core, only used in single thread mode
+    std::size_t active_core{}; ///< Active core, only used in single thread mode
 
     /// Service manager
     std::shared_ptr<Service::SM::ServiceManager> service_manager;
@@ -348,7 +348,7 @@ ARM_Interface& System::CurrentArmInterface() {
     return CurrentCpuCore().ArmInterface();
 }
 
-size_t System::CurrentCoreIndex() {
+std::size_t System::CurrentCoreIndex() {
     return CurrentCpuCore().CoreIndex();
 }
 
@@ -356,7 +356,7 @@ Kernel::Scheduler& System::CurrentScheduler() {
     return *CurrentCpuCore().Scheduler();
 }
 
-const std::shared_ptr<Kernel::Scheduler>& System::Scheduler(size_t core_index) {
+const std::shared_ptr<Kernel::Scheduler>& System::Scheduler(std::size_t core_index) {
     ASSERT(core_index < NUM_CPU_CORES);
     return impl->cpu_cores[core_index]->Scheduler();
 }
@@ -369,12 +369,12 @@ const Kernel::SharedPtr<Kernel::Process>& System::CurrentProcess() const {
     return impl->kernel.CurrentProcess();
 }
 
-ARM_Interface& System::ArmInterface(size_t core_index) {
+ARM_Interface& System::ArmInterface(std::size_t core_index) {
     ASSERT(core_index < NUM_CPU_CORES);
     return impl->cpu_cores[core_index]->ArmInterface();
 }
 
-Cpu& System::CpuCore(size_t core_index) {
+Cpu& System::CpuCore(std::size_t core_index) {
     ASSERT(core_index < NUM_CPU_CORES);
     return *impl->cpu_cores[core_index];
 }
diff --git a/src/core/core.h b/src/core/core.h
index ab3663427..f9a3e97e3 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -145,16 +145,16 @@ public:
     ARM_Interface& CurrentArmInterface();
 
     /// Gets the index of the currently running CPU core
-    size_t CurrentCoreIndex();
+    std::size_t CurrentCoreIndex();
 
     /// Gets the scheduler for the CPU core that is currently running
     Kernel::Scheduler& CurrentScheduler();
 
     /// Gets an ARM interface to the CPU core with the specified index
-    ARM_Interface& ArmInterface(size_t core_index);
+    ARM_Interface& ArmInterface(std::size_t core_index);
 
     /// Gets a CPU interface to the CPU core with the specified index
-    Cpu& CpuCore(size_t core_index);
+    Cpu& CpuCore(std::size_t core_index);
 
     /// Gets the exclusive monitor
     ExclusiveMonitor& Monitor();
@@ -172,7 +172,7 @@ public:
     const VideoCore::RendererBase& Renderer() const;
 
     /// Gets the scheduler for the CPU core with the specified index
-    const std::shared_ptr<Kernel::Scheduler>& Scheduler(size_t core_index);
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler(std::size_t core_index);
 
     /// Provides a reference to the current process
     Kernel::SharedPtr<Kernel::Process>& CurrentProcess();
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index b042ee02b..265f8ed9c 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -9,6 +9,7 @@
 #ifdef ARCHITECTURE_x86_64
 #include "core/arm/dynarmic/arm_dynarmic.h"
 #endif
+#include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
@@ -49,24 +50,26 @@ bool CpuBarrier::Rendezvous() {
 }
 
 Cpu::Cpu(std::shared_ptr<ExclusiveMonitor> exclusive_monitor,
-         std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
+         std::shared_ptr<CpuBarrier> cpu_barrier, std::size_t core_index)
     : cpu_barrier{std::move(cpu_barrier)}, core_index{core_index} {
 
     if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
-        arm_interface = std::make_shared<ARM_Dynarmic>(exclusive_monitor, core_index);
+        arm_interface = std::make_unique<ARM_Dynarmic>(exclusive_monitor, core_index);
 #else
-        arm_interface = std::make_shared<ARM_Unicorn>();
+        arm_interface = std::make_unique<ARM_Unicorn>();
         LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif
     } else {
-        arm_interface = std::make_shared<ARM_Unicorn>();
+        arm_interface = std::make_unique<ARM_Unicorn>();
     }
 
-    scheduler = std::make_shared<Kernel::Scheduler>(arm_interface.get());
+    scheduler = std::make_shared<Kernel::Scheduler>(*arm_interface);
 }
 
-std::shared_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(size_t num_cores) {
+Cpu::~Cpu() = default;
+
+std::shared_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) {
     if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
         return std::make_shared<DynarmicExclusiveMonitor>(num_cores);
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 40ed34b47..ee7e04abc 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -6,11 +6,10 @@
 
 #include <atomic>
 #include <condition_variable>
+#include <cstddef>
 #include <memory>
 #include <mutex>
-#include <string>
 #include "common/common_types.h"
-#include "core/arm/exclusive_monitor.h"
 
 namespace Kernel {
 class Scheduler;
@@ -19,6 +18,7 @@ class Scheduler;
 namespace Core {
 
 class ARM_Interface;
+class ExclusiveMonitor;
 
 constexpr unsigned NUM_CPU_CORES{4};
 
@@ -42,7 +42,8 @@ private:
 class Cpu {
 public:
     Cpu(std::shared_ptr<ExclusiveMonitor> exclusive_monitor,
-        std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index);
+        std::shared_ptr<CpuBarrier> cpu_barrier, std::size_t core_index);
+    ~Cpu();
 
     void RunLoop(bool tight_loop = true);
 
@@ -66,21 +67,21 @@ public:
         return core_index == 0;
     }
 
-    size_t CoreIndex() const {
+    std::size_t CoreIndex() const {
         return core_index;
     }
 
-    static std::shared_ptr<ExclusiveMonitor> MakeExclusiveMonitor(size_t num_cores);
+    static std::shared_ptr<ExclusiveMonitor> MakeExclusiveMonitor(std::size_t num_cores);
 
 private:
     void Reschedule();
 
-    std::shared_ptr<ARM_Interface> arm_interface;
+    std::unique_ptr<ARM_Interface> arm_interface;
     std::shared_ptr<CpuBarrier> cpu_barrier;
     std::shared_ptr<Kernel::Scheduler> scheduler;
 
     std::atomic<bool> reschedule_pending = false;
-    size_t core_index;
+    std::size_t core_index;
 };
 
 } // namespace Core
diff --git a/src/core/crypto/aes_util.cpp b/src/core/crypto/aes_util.cpp
index 89ade5000..4be76bb43 100644
--- a/src/core/crypto/aes_util.cpp
+++ b/src/core/crypto/aes_util.cpp
@@ -10,9 +10,9 @@
 
 namespace Core::Crypto {
 namespace {
-std::vector<u8> CalculateNintendoTweak(size_t sector_id) {
+std::vector<u8> CalculateNintendoTweak(std::size_t sector_id) {
     std::vector<u8> out(0x10);
-    for (size_t i = 0xF; i <= 0xF; --i) {
+    for (std::size_t i = 0xF; i <= 0xF; --i) {
         out[i] = sector_id & 0xFF;
         sector_id >>= 8;
     }
@@ -20,11 +20,14 @@ std::vector<u8> CalculateNintendoTweak(size_t sector_id) {
 }
 } // Anonymous namespace
 
-static_assert(static_cast<size_t>(Mode::CTR) == static_cast<size_t>(MBEDTLS_CIPHER_AES_128_CTR),
+static_assert(static_cast<std::size_t>(Mode::CTR) ==
+                  static_cast<std::size_t>(MBEDTLS_CIPHER_AES_128_CTR),
               "CTR has incorrect value.");
-static_assert(static_cast<size_t>(Mode::ECB) == static_cast<size_t>(MBEDTLS_CIPHER_AES_128_ECB),
+static_assert(static_cast<std::size_t>(Mode::ECB) ==
+                  static_cast<std::size_t>(MBEDTLS_CIPHER_AES_128_ECB),
               "ECB has incorrect value.");
-static_assert(static_cast<size_t>(Mode::XTS) == static_cast<size_t>(MBEDTLS_CIPHER_AES_128_XTS),
+static_assert(static_cast<std::size_t>(Mode::XTS) ==
+                  static_cast<std::size_t>(MBEDTLS_CIPHER_AES_128_XTS),
               "XTS has incorrect value.");
 
 // Structure to hide mbedtls types from header file
@@ -33,7 +36,7 @@ struct CipherContext {
     mbedtls_cipher_context_t decryption_context;
 };
 
-template <typename Key, size_t KeySize>
+template <typename Key, std::size_t KeySize>
 Crypto::AESCipher<Key, KeySize>::AESCipher(Key key, Mode mode)
     : ctx(std::make_unique<CipherContext>()) {
     mbedtls_cipher_init(&ctx->encryption_context);
@@ -54,26 +57,26 @@ Crypto::AESCipher<Key, KeySize>::AESCipher(Key key, Mode mode)
     //"Failed to set key on mbedtls ciphers.");
 }
 
-template <typename Key, size_t KeySize>
+template <typename Key, std::size_t KeySize>
 AESCipher<Key, KeySize>::~AESCipher() {
     mbedtls_cipher_free(&ctx->encryption_context);
     mbedtls_cipher_free(&ctx->decryption_context);
 }
 
-template <typename Key, size_t KeySize>
+template <typename Key, std::size_t KeySize>
 void AESCipher<Key, KeySize>::SetIV(std::vector<u8> iv) {
     ASSERT_MSG((mbedtls_cipher_set_iv(&ctx->encryption_context, iv.data(), iv.size()) ||
                 mbedtls_cipher_set_iv(&ctx->decryption_context, iv.data(), iv.size())) == 0,
                "Failed to set IV on mbedtls ciphers.");
 }
 
-template <typename Key, size_t KeySize>
-void AESCipher<Key, KeySize>::Transcode(const u8* src, size_t size, u8* dest, Op op) const {
+template <typename Key, std::size_t KeySize>
+void AESCipher<Key, KeySize>::Transcode(const u8* src, std::size_t size, u8* dest, Op op) const {
     auto* const context = op == Op::Encrypt ? &ctx->encryption_context : &ctx->decryption_context;
 
     mbedtls_cipher_reset(context);
 
-    size_t written = 0;
+    std::size_t written = 0;
     if (mbedtls_cipher_get_cipher_mode(context) == MBEDTLS_MODE_XTS) {
         mbedtls_cipher_update(context, src, size, dest, &written);
         if (written != size) {
@@ -90,8 +93,8 @@ void AESCipher<Key, KeySize>::Transcode(const u8* src, size_t size, u8* dest, Op
             return;
         }
 
-        for (size_t offset = 0; offset < size; offset += block_size) {
-            auto length = std::min<size_t>(block_size, size - offset);
+        for (std::size_t offset = 0; offset < size; offset += block_size) {
+            auto length = std::min<std::size_t>(block_size, size - offset);
             mbedtls_cipher_update(context, src + offset, length, dest + offset, &written);
             if (written != length) {
                 if (length < block_size) {
@@ -110,12 +113,12 @@ void AESCipher<Key, KeySize>::Transcode(const u8* src, size_t size, u8* dest, Op
     mbedtls_cipher_finish(context, nullptr, nullptr);
 }
 
-template <typename Key, size_t KeySize>
-void AESCipher<Key, KeySize>::XTSTranscode(const u8* src, size_t size, u8* dest, size_t sector_id,
-                                           size_t sector_size, Op op) {
+template <typename Key, std::size_t KeySize>
+void AESCipher<Key, KeySize>::XTSTranscode(const u8* src, std::size_t size, u8* dest,
+                                           std::size_t sector_id, std::size_t sector_size, Op op) {
     ASSERT_MSG(size % sector_size == 0, "XTS decryption size must be a multiple of sector size.");
 
-    for (size_t i = 0; i < size; i += sector_size) {
+    for (std::size_t i = 0; i < size; i += sector_size) {
         SetIV(CalculateNintendoTweak(sector_id++));
         Transcode<u8, u8>(src + i, sector_size, dest + i, op);
     }
diff --git a/src/core/crypto/aes_util.h b/src/core/crypto/aes_util.h
index 8ce9d6612..edc4ab910 100644
--- a/src/core/crypto/aes_util.h
+++ b/src/core/crypto/aes_util.h
@@ -25,7 +25,7 @@ enum class Op {
     Decrypt,
 };
 
-template <typename Key, size_t KeySize = sizeof(Key)>
+template <typename Key, std::size_t KeySize = sizeof(Key)>
 class AESCipher {
     static_assert(std::is_same_v<Key, std::array<u8, KeySize>>, "Key must be std::array of u8.");
     static_assert(KeySize == 0x10 || KeySize == 0x20, "KeySize must be 128 or 256.");
@@ -38,25 +38,25 @@ public:
     void SetIV(std::vector<u8> iv);
 
     template <typename Source, typename Dest>
-    void Transcode(const Source* src, size_t size, Dest* dest, Op op) const {
+    void Transcode(const Source* src, std::size_t size, Dest* dest, Op op) const {
         static_assert(std::is_trivially_copyable_v<Source> && std::is_trivially_copyable_v<Dest>,
                       "Transcode source and destination types must be trivially copyable.");
         Transcode(reinterpret_cast<const u8*>(src), size, reinterpret_cast<u8*>(dest), op);
     }
 
-    void Transcode(const u8* src, size_t size, u8* dest, Op op) const;
+    void Transcode(const u8* src, std::size_t size, u8* dest, Op op) const;
 
     template <typename Source, typename Dest>
-    void XTSTranscode(const Source* src, size_t size, Dest* dest, size_t sector_id,
-                      size_t sector_size, Op op) {
+    void XTSTranscode(const Source* src, std::size_t size, Dest* dest, std::size_t sector_id,
+                      std::size_t sector_size, Op op) {
         static_assert(std::is_trivially_copyable_v<Source> && std::is_trivially_copyable_v<Dest>,
                       "XTSTranscode source and destination types must be trivially copyable.");
         XTSTranscode(reinterpret_cast<const u8*>(src), size, reinterpret_cast<u8*>(dest), sector_id,
                      sector_size, op);
     }
 
-    void XTSTranscode(const u8* src, size_t size, u8* dest, size_t sector_id, size_t sector_size,
-                      Op op);
+    void XTSTranscode(const u8* src, std::size_t size, u8* dest, std::size_t sector_id,
+                      std::size_t sector_size, Op op);
 
 private:
     std::unique_ptr<CipherContext> ctx;
diff --git a/src/core/crypto/ctr_encryption_layer.cpp b/src/core/crypto/ctr_encryption_layer.cpp
index 296fad419..902841c77 100644
--- a/src/core/crypto/ctr_encryption_layer.cpp
+++ b/src/core/crypto/ctr_encryption_layer.cpp
@@ -8,11 +8,12 @@
 
 namespace Core::Crypto {
 
-CTREncryptionLayer::CTREncryptionLayer(FileSys::VirtualFile base_, Key128 key_, size_t base_offset)
+CTREncryptionLayer::CTREncryptionLayer(FileSys::VirtualFile base_, Key128 key_,
+                                       std::size_t base_offset)
     : EncryptionLayer(std::move(base_)), base_offset(base_offset), cipher(key_, Mode::CTR),
       iv(16, 0) {}
 
-size_t CTREncryptionLayer::Read(u8* data, size_t length, size_t offset) const {
+std::size_t CTREncryptionLayer::Read(u8* data, std::size_t length, std::size_t offset) const {
     if (length == 0)
         return 0;
 
@@ -28,7 +29,7 @@ size_t CTREncryptionLayer::Read(u8* data, size_t length, size_t offset) const {
     std::vector<u8> block = base->ReadBytes(0x10, offset - sector_offset);
     UpdateIV(base_offset + offset - sector_offset);
     cipher.Transcode(block.data(), block.size(), block.data(), Op::Decrypt);
-    size_t read = 0x10 - sector_offset;
+    std::size_t read = 0x10 - sector_offset;
 
     if (length + sector_offset < 0x10) {
         std::memcpy(data, block.data() + sector_offset, std::min<u64>(length, read));
@@ -43,9 +44,9 @@ void CTREncryptionLayer::SetIV(const std::vector<u8>& iv_) {
     iv.assign(iv_.cbegin(), iv_.cbegin() + length);
 }
 
-void CTREncryptionLayer::UpdateIV(size_t offset) const {
+void CTREncryptionLayer::UpdateIV(std::size_t offset) const {
     offset >>= 4;
-    for (size_t i = 0; i < 8; ++i) {
+    for (std::size_t i = 0; i < 8; ++i) {
         iv[16 - i - 1] = offset & 0xFF;
         offset >>= 8;
     }
diff --git a/src/core/crypto/ctr_encryption_layer.h b/src/core/crypto/ctr_encryption_layer.h
index 11b8683c7..a7bf810f4 100644
--- a/src/core/crypto/ctr_encryption_layer.h
+++ b/src/core/crypto/ctr_encryption_layer.h
@@ -14,20 +14,20 @@ namespace Core::Crypto {
 // Sits on top of a VirtualFile and provides CTR-mode AES decription.
 class CTREncryptionLayer : public EncryptionLayer {
 public:
-    CTREncryptionLayer(FileSys::VirtualFile base, Key128 key, size_t base_offset);
+    CTREncryptionLayer(FileSys::VirtualFile base, Key128 key, std::size_t base_offset);
 
-    size_t Read(u8* data, size_t length, size_t offset) const override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
 
     void SetIV(const std::vector<u8>& iv);
 
 private:
-    size_t base_offset;
+    std::size_t base_offset;
 
     // Must be mutable as operations modify cipher contexts.
     mutable AESCipher<Key128> cipher;
     mutable std::vector<u8> iv;
 
-    void UpdateIV(size_t offset) const;
+    void UpdateIV(std::size_t offset) const;
 };
 
 } // namespace Core::Crypto
diff --git a/src/core/crypto/encryption_layer.cpp b/src/core/crypto/encryption_layer.cpp
index 4204527e3..4c377d7d4 100644
--- a/src/core/crypto/encryption_layer.cpp
+++ b/src/core/crypto/encryption_layer.cpp
@@ -12,11 +12,11 @@ std::string EncryptionLayer::GetName() const {
     return base->GetName();
 }
 
-size_t EncryptionLayer::GetSize() const {
+std::size_t EncryptionLayer::GetSize() const {
     return base->GetSize();
 }
 
-bool EncryptionLayer::Resize(size_t new_size) {
+bool EncryptionLayer::Resize(std::size_t new_size) {
     return false;
 }
 
@@ -32,7 +32,7 @@ bool EncryptionLayer::IsReadable() const {
     return true;
 }
 
-size_t EncryptionLayer::Write(const u8* data, size_t length, size_t offset) {
+std::size_t EncryptionLayer::Write(const u8* data, std::size_t length, std::size_t offset) {
     return 0;
 }
 
diff --git a/src/core/crypto/encryption_layer.h b/src/core/crypto/encryption_layer.h
index 7f05af9b4..53619cb38 100644
--- a/src/core/crypto/encryption_layer.h
+++ b/src/core/crypto/encryption_layer.h
@@ -15,15 +15,15 @@ class EncryptionLayer : public FileSys::VfsFile {
 public:
     explicit EncryptionLayer(FileSys::VirtualFile base);
 
-    size_t Read(u8* data, size_t length, size_t offset) const override = 0;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override = 0;
 
     std::string GetName() const override;
-    size_t GetSize() const override;
-    bool Resize(size_t new_size) override;
+    std::size_t GetSize() const override;
+    bool Resize(std::size_t new_size) override;
     std::shared_ptr<FileSys::VfsDirectory> GetContainingDirectory() const override;
     bool IsWritable() const override;
     bool IsReadable() const override;
-    size_t Write(const u8* data, size_t length, size_t offset) override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
     bool Rename(std::string_view name) override;
 
 protected:
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index 6f27f990b..bf3a70944 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -54,7 +54,7 @@ boost::optional<Key128> DeriveSDSeed() {
         return boost::none;
 
     std::array<u8, 0x10> buffer{};
-    size_t offset = 0;
+    std::size_t offset = 0;
     for (; offset + 0x10 < save_43.GetSize(); ++offset) {
         save_43.Seek(offset, SEEK_SET);
         save_43.ReadBytes(buffer.data(), buffer.size());
@@ -105,7 +105,7 @@ Loader::ResultStatus DeriveSDKeys(std::array<Key256, 2>& sd_keys, const KeyManag
 
     // Combine sources and seed
     for (auto& source : sd_key_sources) {
-        for (size_t i = 0; i < source.size(); ++i)
+        for (std::size_t i = 0; i < source.size(); ++i)
             source[i] ^= sd_seed[i & 0xF];
     }
 
@@ -207,7 +207,7 @@ Key256 KeyManager::GetKey(S256KeyType id, u64 field1, u64 field2) const {
     return s256_keys.at({id, field1, field2});
 }
 
-template <size_t Size>
+template <std::size_t Size>
 void KeyManager::WriteKeyToFile(bool title_key, std::string_view keyname,
                                 const std::array<u8, Size>& key) {
     const std::string yuzu_keys_dir = FileUtil::GetUserPath(FileUtil::UserPath::KeysDir);
diff --git a/src/core/crypto/key_manager.h b/src/core/crypto/key_manager.h
index ce67913bb..978eec8dc 100644
--- a/src/core/crypto/key_manager.h
+++ b/src/core/crypto/key_manager.h
@@ -108,7 +108,7 @@ private:
     void LoadFromFile(const std::string& filename, bool is_title_keys);
     void AttemptLoadKeyFile(const std::string& dir1, const std::string& dir2,
                             const std::string& filename, bool title);
-    template <size_t Size>
+    template <std::size_t Size>
     void WriteKeyToFile(bool title_key, std::string_view keyname, const std::array<u8, Size>& key);
 
     static const boost::container::flat_map<std::string, KeyIndex<S128KeyType>> s128_file_id;
diff --git a/src/core/crypto/xts_encryption_layer.cpp b/src/core/crypto/xts_encryption_layer.cpp
index c10832cfe..8f0ba4ee7 100644
--- a/src/core/crypto/xts_encryption_layer.cpp
+++ b/src/core/crypto/xts_encryption_layer.cpp
@@ -14,7 +14,7 @@ constexpr u64 XTS_SECTOR_SIZE = 0x4000;
 XTSEncryptionLayer::XTSEncryptionLayer(FileSys::VirtualFile base_, Key256 key_)
     : EncryptionLayer(std::move(base_)), cipher(key_, Mode::XTS) {}
 
-size_t XTSEncryptionLayer::Read(u8* data, size_t length, size_t offset) const {
+std::size_t XTSEncryptionLayer::Read(u8* data, std::size_t length, std::size_t offset) const {
     if (length == 0)
         return 0;
 
@@ -46,7 +46,7 @@ size_t XTSEncryptionLayer::Read(u8* data, size_t length, size_t offset) const {
         block.resize(XTS_SECTOR_SIZE);
     cipher.XTSTranscode(block.data(), block.size(), block.data(),
                         (offset - sector_offset) / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt);
-    const size_t read = XTS_SECTOR_SIZE - sector_offset;
+    const std::size_t read = XTS_SECTOR_SIZE - sector_offset;
 
     if (length + sector_offset < XTS_SECTOR_SIZE) {
         std::memcpy(data, block.data() + sector_offset, std::min<u64>(length, read));
diff --git a/src/core/crypto/xts_encryption_layer.h b/src/core/crypto/xts_encryption_layer.h
index 7a1f1dc64..5f8f00fe7 100644
--- a/src/core/crypto/xts_encryption_layer.h
+++ b/src/core/crypto/xts_encryption_layer.h
@@ -15,7 +15,7 @@ class XTSEncryptionLayer : public EncryptionLayer {
 public:
     XTSEncryptionLayer(FileSys::VirtualFile base, Key256 key);
 
-    size_t Read(u8* data, size_t length, size_t offset) const override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
 
 private:
     // Must be mutable as operations modify cipher contexts.
diff --git a/src/core/file_sys/bis_factory.cpp b/src/core/file_sys/bis_factory.cpp
index 205492897..6102ef476 100644
--- a/src/core/file_sys/bis_factory.cpp
+++ b/src/core/file_sys/bis_factory.cpp
@@ -2,13 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <fmt/format.h>
 #include "core/file_sys/bis_factory.h"
 #include "core/file_sys/registered_cache.h"
 
 namespace FileSys {
 
-BISFactory::BISFactory(VirtualDir nand_root_)
-    : nand_root(std::move(nand_root_)),
+BISFactory::BISFactory(VirtualDir nand_root_, VirtualDir load_root_)
+    : nand_root(std::move(nand_root_)), load_root(std::move(load_root_)),
       sysnand_cache(std::make_shared<RegisteredCache>(
           GetOrCreateDirectoryRelative(nand_root, "/system/Contents/registered"))),
       usrnand_cache(std::make_shared<RegisteredCache>(
@@ -24,4 +25,11 @@ std::shared_ptr<RegisteredCache> BISFactory::GetUserNANDContents() const {
     return usrnand_cache;
 }
 
+VirtualDir BISFactory::GetModificationLoadRoot(u64 title_id) const {
+    // LayeredFS doesn't work on updates and title id-less homebrew
+    if (title_id == 0 || (title_id & 0x800) > 0)
+        return nullptr;
+    return GetOrCreateDirectoryRelative(load_root, fmt::format("/{:016X}", title_id));
+}
+
 } // namespace FileSys
diff --git a/src/core/file_sys/bis_factory.h b/src/core/file_sys/bis_factory.h
index 9523dd864..c352e0925 100644
--- a/src/core/file_sys/bis_factory.h
+++ b/src/core/file_sys/bis_factory.h
@@ -17,14 +17,17 @@ class RegisteredCache;
 /// registered caches.
 class BISFactory {
 public:
-    explicit BISFactory(VirtualDir nand_root);
+    explicit BISFactory(VirtualDir nand_root, VirtualDir load_root);
     ~BISFactory();
 
     std::shared_ptr<RegisteredCache> GetSystemNANDContents() const;
     std::shared_ptr<RegisteredCache> GetUserNANDContents() const;
 
+    VirtualDir GetModificationLoadRoot(u64 title_id) const;
+
 private:
     VirtualDir nand_root;
+    VirtualDir load_root;
 
     std::shared_ptr<RegisteredCache> sysnand_cache;
     std::shared_ptr<RegisteredCache> usrnand_cache;
diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp
index 8218893b2..edfc1bbd4 100644
--- a/src/core/file_sys/card_image.cpp
+++ b/src/core/file_sys/card_image.cpp
@@ -41,13 +41,14 @@ XCI::XCI(VirtualFile file_) : file(std::move(file_)), partitions(0x4) {
 
     for (XCIPartition partition :
          {XCIPartition::Update, XCIPartition::Normal, XCIPartition::Secure, XCIPartition::Logo}) {
-        auto raw = main_hfs.GetFile(partition_names[static_cast<size_t>(partition)]);
+        auto raw = main_hfs.GetFile(partition_names[static_cast<std::size_t>(partition)]);
         if (raw != nullptr)
-            partitions[static_cast<size_t>(partition)] = std::make_shared<PartitionFilesystem>(raw);
+            partitions[static_cast<std::size_t>(partition)] =
+                std::make_shared<PartitionFilesystem>(raw);
     }
 
     secure_partition = std::make_shared<NSP>(
-        main_hfs.GetFile(partition_names[static_cast<size_t>(XCIPartition::Secure)]));
+        main_hfs.GetFile(partition_names[static_cast<std::size_t>(XCIPartition::Secure)]));
 
     const auto secure_ncas = secure_partition->GetNCAsCollapsed();
     std::copy(secure_ncas.begin(), secure_ncas.end(), std::back_inserter(ncas));
@@ -92,7 +93,7 @@ Loader::ResultStatus XCI::GetProgramNCAStatus() const {
 }
 
 VirtualDir XCI::GetPartition(XCIPartition partition) const {
-    return partitions[static_cast<size_t>(partition)];
+    return partitions[static_cast<std::size_t>(partition)];
 }
 
 std::shared_ptr<NSP> XCI::GetSecurePartitionNSP() const {
@@ -168,11 +169,11 @@ bool XCI::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
 }
 
 Loader::ResultStatus XCI::AddNCAFromPartition(XCIPartition part) {
-    if (partitions[static_cast<size_t>(part)] == nullptr) {
+    if (partitions[static_cast<std::size_t>(part)] == nullptr) {
         return Loader::ResultStatus::ErrorXCIMissingPartition;
     }
 
-    for (const VirtualFile& file : partitions[static_cast<size_t>(part)]->GetFiles()) {
+    for (const VirtualFile& file : partitions[static_cast<std::size_t>(part)]->GetFiles()) {
         if (file->GetExtension() != "nca")
             continue;
         auto nca = std::make_shared<NCA>(file);
@@ -187,7 +188,7 @@ Loader::ResultStatus XCI::AddNCAFromPartition(XCIPartition part) {
         } else {
             const u16 error_id = static_cast<u16>(nca->GetStatus());
             LOG_CRITICAL(Loader, "Could not load NCA {}/{}, failed with error code {:04X} ({})",
-                         partition_names[static_cast<size_t>(part)], nca->GetName(), error_id,
+                         partition_names[static_cast<std::size_t>(part)], nca->GetName(), error_id,
                          nca->GetStatus());
         }
     }
diff --git a/src/core/file_sys/content_archive.cpp b/src/core/file_sys/content_archive.cpp
index 79bfb6fec..aa1b3c17d 100644
--- a/src/core/file_sys/content_archive.cpp
+++ b/src/core/file_sys/content_archive.cpp
@@ -298,11 +298,11 @@ NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_off
         auto section = sections[i];
 
         if (section.raw.header.filesystem_type == NCASectionFilesystemType::ROMFS) {
-            const size_t base_offset =
+            const std::size_t base_offset =
                 header.section_tables[i].media_offset * MEDIA_OFFSET_MULTIPLIER;
             ivfc_offset = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset;
-            const size_t romfs_offset = base_offset + ivfc_offset;
-            const size_t romfs_size = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].size;
+            const std::size_t romfs_offset = base_offset + ivfc_offset;
+            const std::size_t romfs_size = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].size;
             auto raw = std::make_shared<OffsetVfsFile>(file, romfs_size, romfs_offset);
             auto dec = Decrypt(section, raw, romfs_offset);
 
@@ -463,6 +463,8 @@ NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_off
     status = Loader::ResultStatus::Success;
 }
 
+NCA::~NCA() = default;
+
 Loader::ResultStatus NCA::GetStatus() const {
     return status;
 }
diff --git a/src/core/file_sys/content_archive.h b/src/core/file_sys/content_archive.h
index 00eca52da..f9f66cae9 100644
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -81,6 +81,8 @@ class NCA : public ReadOnlyVfsDirectory {
 public:
     explicit NCA(VirtualFile file, VirtualFile bktr_base_romfs = nullptr,
                  u64 bktr_base_ivfc_offset = 0);
+    ~NCA() override;
+
     Loader::ResultStatus GetStatus() const;
 
     std::vector<std::shared_ptr<VfsFile>> GetFiles() const override;
diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp
index e76bf77bf..5b1177a03 100644
--- a/src/core/file_sys/control_metadata.cpp
+++ b/src/core/file_sys/control_metadata.cpp
@@ -8,6 +8,14 @@
 
 namespace FileSys {
 
+const std::array<const char*, 15> LANGUAGE_NAMES = {
+    "AmericanEnglish", "BritishEnglish", "Japanese",
+    "French",          "German",         "LatinAmericanSpanish",
+    "Spanish",         "Italian",        "Dutch",
+    "CanadianFrench",  "Portugese",      "Russian",
+    "Korean",          "Taiwanese",      "Chinese",
+};
+
 std::string LanguageEntry::GetApplicationName() const {
     return Common::StringFromFixedZeroTerminatedBuffer(application_name.data(), 0x200);
 }
@@ -20,18 +28,20 @@ NACP::NACP(VirtualFile file) : raw(std::make_unique<RawNACP>()) {
     file->ReadObject(raw.get());
 }
 
+NACP::~NACP() = default;
+
 const LanguageEntry& NACP::GetLanguageEntry(Language language) const {
     if (language != Language::Default) {
         return raw->language_entries.at(static_cast<u8>(language));
-    } else {
-        for (const auto& language_entry : raw->language_entries) {
-            if (!language_entry.GetApplicationName().empty())
-                return language_entry;
-        }
-
-        // Fallback to English
-        return GetLanguageEntry(Language::AmericanEnglish);
     }
+
+    for (const auto& language_entry : raw->language_entries) {
+        if (!language_entry.GetApplicationName().empty())
+            return language_entry;
+    }
+
+    // Fallback to English
+    return GetLanguageEntry(Language::AmericanEnglish);
 }
 
 std::string NACP::GetApplicationName(Language language) const {
diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h
index 8a510bf46..43d6f0719 100644
--- a/src/core/file_sys/control_metadata.h
+++ b/src/core/file_sys/control_metadata.h
@@ -66,18 +66,15 @@ enum class Language : u8 {
     Default = 255,
 };
 
-static constexpr std::array<const char*, 15> LANGUAGE_NAMES = {
-    "AmericanEnglish", "BritishEnglish", "Japanese",
-    "French",          "German",         "LatinAmericanSpanish",
-    "Spanish",         "Italian",        "Dutch",
-    "CanadianFrench",  "Portugese",      "Russian",
-    "Korean",          "Taiwanese",      "Chinese"};
+extern const std::array<const char*, 15> LANGUAGE_NAMES;
 
 // A class representing the format used by NX metadata files, typically named Control.nacp.
 // These store application name, dev name, title id, and other miscellaneous data.
 class NACP {
 public:
     explicit NACP(VirtualFile file);
+    ~NACP();
+
     const LanguageEntry& GetLanguageEntry(Language language = Language::Default) const;
     std::string GetApplicationName(Language language = Language::Default) const;
     std::string GetDeveloperName(Language language = Language::Default) const;
diff --git a/src/core/file_sys/directory.h b/src/core/file_sys/directory.h
index 3759e743a..12bb90ec8 100644
--- a/src/core/file_sys/directory.h
+++ b/src/core/file_sys/directory.h
@@ -25,7 +25,7 @@ enum EntryType : u8 {
 struct Entry {
     Entry(std::string_view view, EntryType entry_type, u64 entry_size)
         : type{entry_type}, file_size{entry_size} {
-        const size_t copy_size = view.copy(filename, std::size(filename) - 1);
+        const std::size_t copy_size = view.copy(filename, std::size(filename) - 1);
         filename[copy_size] = '\0';
     }
 
diff --git a/src/core/file_sys/fsmitm_romfsbuild.cpp b/src/core/file_sys/fsmitm_romfsbuild.cpp
new file mode 100644
index 000000000..2a913ce82
--- /dev/null
+++ b/src/core/file_sys/fsmitm_romfsbuild.cpp
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 2018 Atmosphère-NX
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Adapted by DarkLordZach for use/interaction with yuzu
+ *
+ * Modifications Copyright 2018 yuzu emulator team
+ * Licensed under GPLv2 or any later version
+ * Refer to the license.txt file included.
+ */
+
+#include <cstring>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "core/file_sys/fsmitm_romfsbuild.h"
+#include "core/file_sys/vfs.h"
+#include "core/file_sys/vfs_vector.h"
+
+namespace FileSys {
+
+constexpr u64 FS_MAX_PATH = 0x301;
+
+constexpr u32 ROMFS_ENTRY_EMPTY = 0xFFFFFFFF;
+constexpr u32 ROMFS_FILEPARTITION_OFS = 0x200;
+
+// Types for building a RomFS.
+struct RomFSHeader {
+    u64 header_size;
+    u64 dir_hash_table_ofs;
+    u64 dir_hash_table_size;
+    u64 dir_table_ofs;
+    u64 dir_table_size;
+    u64 file_hash_table_ofs;
+    u64 file_hash_table_size;
+    u64 file_table_ofs;
+    u64 file_table_size;
+    u64 file_partition_ofs;
+};
+static_assert(sizeof(RomFSHeader) == 0x50, "RomFSHeader has incorrect size.");
+
+struct RomFSDirectoryEntry {
+    u32 parent;
+    u32 sibling;
+    u32 child;
+    u32 file;
+    u32 hash;
+    u32 name_size;
+};
+static_assert(sizeof(RomFSDirectoryEntry) == 0x18, "RomFSDirectoryEntry has incorrect size.");
+
+struct RomFSFileEntry {
+    u32 parent;
+    u32 sibling;
+    u64 offset;
+    u64 size;
+    u32 hash;
+    u32 name_size;
+};
+static_assert(sizeof(RomFSFileEntry) == 0x20, "RomFSFileEntry has incorrect size.");
+
+struct RomFSBuildFileContext;
+
+struct RomFSBuildDirectoryContext {
+    std::string path;
+    u32 cur_path_ofs = 0;
+    u32 path_len = 0;
+    u32 entry_offset = 0;
+    std::shared_ptr<RomFSBuildDirectoryContext> parent;
+    std::shared_ptr<RomFSBuildDirectoryContext> child;
+    std::shared_ptr<RomFSBuildDirectoryContext> sibling;
+    std::shared_ptr<RomFSBuildFileContext> file;
+};
+
+struct RomFSBuildFileContext {
+    std::string path;
+    u32 cur_path_ofs = 0;
+    u32 path_len = 0;
+    u32 entry_offset = 0;
+    u64 offset = 0;
+    u64 size = 0;
+    std::shared_ptr<RomFSBuildDirectoryContext> parent;
+    std::shared_ptr<RomFSBuildFileContext> sibling;
+    VirtualFile source;
+};
+
+static u32 romfs_calc_path_hash(u32 parent, std::string path, u32 start, std::size_t path_len) {
+    u32 hash = parent ^ 123456789;
+    for (u32 i = 0; i < path_len; i++) {
+        hash = (hash >> 5) | (hash << 27);
+        hash ^= path[start + i];
+    }
+
+    return hash;
+}
+
+static u64 romfs_get_hash_table_count(u64 num_entries) {
+    if (num_entries < 3) {
+        return 3;
+    }
+
+    if (num_entries < 19) {
+        return num_entries | 1;
+    }
+
+    u64 count = num_entries;
+    while (count % 2 == 0 || count % 3 == 0 || count % 5 == 0 || count % 7 == 0 ||
+           count % 11 == 0 || count % 13 == 0 || count % 17 == 0) {
+        count++;
+    }
+    return count;
+}
+
+void RomFSBuildContext::VisitDirectory(VirtualDir root_romfs,
+                                       std::shared_ptr<RomFSBuildDirectoryContext> parent) {
+    std::vector<std::shared_ptr<RomFSBuildDirectoryContext>> child_dirs;
+
+    VirtualDir dir;
+
+    if (parent->path_len == 0)
+        dir = root_romfs;
+    else
+        dir = root_romfs->GetDirectoryRelative(parent->path);
+
+    const auto entries = dir->GetEntries();
+
+    for (const auto& kv : entries) {
+        if (kv.second == VfsEntryType::Directory) {
+            const auto child = std::make_shared<RomFSBuildDirectoryContext>();
+            // Set child's path.
+            child->cur_path_ofs = parent->path_len + 1;
+            child->path_len = child->cur_path_ofs + static_cast<u32>(kv.first.size());
+            child->path = parent->path + "/" + kv.first;
+
+            // Sanity check on path_len
+            ASSERT(child->path_len < FS_MAX_PATH);
+
+            if (AddDirectory(parent, child)) {
+                child_dirs.push_back(child);
+            }
+        } else {
+            const auto child = std::make_shared<RomFSBuildFileContext>();
+            // Set child's path.
+            child->cur_path_ofs = parent->path_len + 1;
+            child->path_len = child->cur_path_ofs + static_cast<u32>(kv.first.size());
+            child->path = parent->path + "/" + kv.first;
+
+            // Sanity check on path_len
+            ASSERT(child->path_len < FS_MAX_PATH);
+
+            child->source = root_romfs->GetFileRelative(child->path);
+
+            child->size = child->source->GetSize();
+
+            AddFile(parent, child);
+        }
+    }
+
+    for (auto& child : child_dirs) {
+        this->VisitDirectory(root_romfs, child);
+    }
+}
+
+bool RomFSBuildContext::AddDirectory(std::shared_ptr<RomFSBuildDirectoryContext> parent_dir_ctx,
+                                     std::shared_ptr<RomFSBuildDirectoryContext> dir_ctx) {
+    // Check whether it's already in the known directories.
+    const auto existing = directories.find(dir_ctx->path);
+    if (existing != directories.end())
+        return false;
+
+    // Add a new directory.
+    num_dirs++;
+    dir_table_size +=
+        sizeof(RomFSDirectoryEntry) + Common::AlignUp(dir_ctx->path_len - dir_ctx->cur_path_ofs, 4);
+    dir_ctx->parent = parent_dir_ctx;
+    directories.emplace(dir_ctx->path, dir_ctx);
+
+    return true;
+}
+
+bool RomFSBuildContext::AddFile(std::shared_ptr<RomFSBuildDirectoryContext> parent_dir_ctx,
+                                std::shared_ptr<RomFSBuildFileContext> file_ctx) {
+    // Check whether it's already in the known files.
+    const auto existing = files.find(file_ctx->path);
+    if (existing != files.end()) {
+        return false;
+    }
+
+    // Add a new file.
+    num_files++;
+    file_table_size +=
+        sizeof(RomFSFileEntry) + Common::AlignUp(file_ctx->path_len - file_ctx->cur_path_ofs, 4);
+    file_ctx->parent = parent_dir_ctx;
+    files.emplace(file_ctx->path, file_ctx);
+
+    return true;
+}
+
+RomFSBuildContext::RomFSBuildContext(VirtualDir base_) : base(std::move(base_)) {
+    root = std::make_shared<RomFSBuildDirectoryContext>();
+    root->path = "\0";
+    directories.emplace(root->path, root);
+    num_dirs = 1;
+    dir_table_size = 0x18;
+
+    VisitDirectory(base, root);
+}
+
+RomFSBuildContext::~RomFSBuildContext() = default;
+
+std::map<u64, VirtualFile> RomFSBuildContext::Build() {
+    const u64 dir_hash_table_entry_count = romfs_get_hash_table_count(num_dirs);
+    const u64 file_hash_table_entry_count = romfs_get_hash_table_count(num_files);
+    dir_hash_table_size = 4 * dir_hash_table_entry_count;
+    file_hash_table_size = 4 * file_hash_table_entry_count;
+
+    // Assign metadata pointers
+    RomFSHeader header{};
+
+    std::vector<u32> dir_hash_table(dir_hash_table_entry_count, ROMFS_ENTRY_EMPTY);
+    std::vector<u32> file_hash_table(file_hash_table_entry_count, ROMFS_ENTRY_EMPTY);
+
+    std::vector<u8> dir_table(dir_table_size);
+    std::vector<u8> file_table(file_table_size);
+
+    std::shared_ptr<RomFSBuildFileContext> cur_file;
+
+    // Determine file offsets.
+    u32 entry_offset = 0;
+    std::shared_ptr<RomFSBuildFileContext> prev_file = nullptr;
+    for (const auto& it : files) {
+        cur_file = it.second;
+        file_partition_size = Common::AlignUp(file_partition_size, 16);
+        cur_file->offset = file_partition_size;
+        file_partition_size += cur_file->size;
+        cur_file->entry_offset = entry_offset;
+        entry_offset += sizeof(RomFSFileEntry) +
+                        Common::AlignUp(cur_file->path_len - cur_file->cur_path_ofs, 4);
+        prev_file = cur_file;
+    }
+    // Assign deferred parent/sibling ownership.
+    for (auto it = files.rbegin(); it != files.rend(); ++it) {
+        cur_file = it->second;
+        cur_file->sibling = cur_file->parent->file;
+        cur_file->parent->file = cur_file;
+    }
+
+    std::shared_ptr<RomFSBuildDirectoryContext> cur_dir;
+
+    // Determine directory offsets.
+    entry_offset = 0;
+    for (const auto& it : directories) {
+        cur_dir = it.second;
+        cur_dir->entry_offset = entry_offset;
+        entry_offset += sizeof(RomFSDirectoryEntry) +
+                        Common::AlignUp(cur_dir->path_len - cur_dir->cur_path_ofs, 4);
+    }
+    // Assign deferred parent/sibling ownership.
+    for (auto it = directories.rbegin(); it->second != root; ++it) {
+        cur_dir = it->second;
+        cur_dir->sibling = cur_dir->parent->child;
+        cur_dir->parent->child = cur_dir;
+    }
+
+    std::map<u64, VirtualFile> out;
+
+    // Populate file tables.
+    for (const auto& it : files) {
+        cur_file = it.second;
+        RomFSFileEntry cur_entry{};
+
+        cur_entry.parent = cur_file->parent->entry_offset;
+        cur_entry.sibling =
+            cur_file->sibling == nullptr ? ROMFS_ENTRY_EMPTY : cur_file->sibling->entry_offset;
+        cur_entry.offset = cur_file->offset;
+        cur_entry.size = cur_file->size;
+
+        const auto name_size = cur_file->path_len - cur_file->cur_path_ofs;
+        const auto hash = romfs_calc_path_hash(cur_file->parent->entry_offset, cur_file->path,
+                                               cur_file->cur_path_ofs, name_size);
+        cur_entry.hash = file_hash_table[hash % file_hash_table_entry_count];
+        file_hash_table[hash % file_hash_table_entry_count] = cur_file->entry_offset;
+
+        cur_entry.name_size = name_size;
+
+        out.emplace(cur_file->offset + ROMFS_FILEPARTITION_OFS, cur_file->source);
+        std::memcpy(file_table.data() + cur_file->entry_offset, &cur_entry, sizeof(RomFSFileEntry));
+        std::memset(file_table.data() + cur_file->entry_offset + sizeof(RomFSFileEntry), 0,
+                    Common::AlignUp(cur_entry.name_size, 4));
+        std::memcpy(file_table.data() + cur_file->entry_offset + sizeof(RomFSFileEntry),
+                    cur_file->path.data() + cur_file->cur_path_ofs, name_size);
+    }
+
+    // Populate dir tables.
+    for (const auto& it : directories) {
+        cur_dir = it.second;
+        RomFSDirectoryEntry cur_entry{};
+
+        cur_entry.parent = cur_dir == root ? 0 : cur_dir->parent->entry_offset;
+        cur_entry.sibling =
+            cur_dir->sibling == nullptr ? ROMFS_ENTRY_EMPTY : cur_dir->sibling->entry_offset;
+        cur_entry.child =
+            cur_dir->child == nullptr ? ROMFS_ENTRY_EMPTY : cur_dir->child->entry_offset;
+        cur_entry.file = cur_dir->file == nullptr ? ROMFS_ENTRY_EMPTY : cur_dir->file->entry_offset;
+
+        const auto name_size = cur_dir->path_len - cur_dir->cur_path_ofs;
+        const auto hash = romfs_calc_path_hash(cur_dir == root ? 0 : cur_dir->parent->entry_offset,
+                                               cur_dir->path, cur_dir->cur_path_ofs, name_size);
+        cur_entry.hash = dir_hash_table[hash % dir_hash_table_entry_count];
+        dir_hash_table[hash % dir_hash_table_entry_count] = cur_dir->entry_offset;
+
+        cur_entry.name_size = name_size;
+
+        std::memcpy(dir_table.data() + cur_dir->entry_offset, &cur_entry,
+                    sizeof(RomFSDirectoryEntry));
+        std::memset(dir_table.data() + cur_dir->entry_offset + sizeof(RomFSDirectoryEntry), 0,
+                    Common::AlignUp(cur_entry.name_size, 4));
+        std::memcpy(dir_table.data() + cur_dir->entry_offset + sizeof(RomFSDirectoryEntry),
+                    cur_dir->path.data() + cur_dir->cur_path_ofs, name_size);
+    }
+
+    // Set header fields.
+    header.header_size = sizeof(RomFSHeader);
+    header.file_hash_table_size = file_hash_table_size;
+    header.file_table_size = file_table_size;
+    header.dir_hash_table_size = dir_hash_table_size;
+    header.dir_table_size = dir_table_size;
+    header.file_partition_ofs = ROMFS_FILEPARTITION_OFS;
+    header.dir_hash_table_ofs = Common::AlignUp(header.file_partition_ofs + file_partition_size, 4);
+    header.dir_table_ofs = header.dir_hash_table_ofs + header.dir_hash_table_size;
+    header.file_hash_table_ofs = header.dir_table_ofs + header.dir_table_size;
+    header.file_table_ofs = header.file_hash_table_ofs + header.file_hash_table_size;
+
+    std::vector<u8> header_data(sizeof(RomFSHeader));
+    std::memcpy(header_data.data(), &header, header_data.size());
+    out.emplace(0, std::make_shared<VectorVfsFile>(std::move(header_data)));
+
+    std::vector<u8> metadata(file_hash_table_size + file_table_size + dir_hash_table_size +
+                             dir_table_size);
+    std::size_t index = 0;
+    std::memcpy(metadata.data(), dir_hash_table.data(), dir_hash_table.size() * sizeof(u32));
+    index += dir_hash_table.size() * sizeof(u32);
+    std::memcpy(metadata.data() + index, dir_table.data(), dir_table.size());
+    index += dir_table.size();
+    std::memcpy(metadata.data() + index, file_hash_table.data(),
+                file_hash_table.size() * sizeof(u32));
+    index += file_hash_table.size() * sizeof(u32);
+    std::memcpy(metadata.data() + index, file_table.data(), file_table.size());
+    out.emplace(header.dir_hash_table_ofs, std::make_shared<VectorVfsFile>(std::move(metadata)));
+
+    return out;
+}
+
+} // namespace FileSys
diff --git a/src/core/file_sys/fsmitm_romfsbuild.h b/src/core/file_sys/fsmitm_romfsbuild.h
new file mode 100644
index 000000000..b0c3c123b
--- /dev/null
+++ b/src/core/file_sys/fsmitm_romfsbuild.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Atmosphère-NX
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Adapted by DarkLordZach for use/interaction with yuzu
+ *
+ * Modifications Copyright 2018 yuzu emulator team
+ * Licensed under GPLv2 or any later version
+ * Refer to the license.txt file included.
+ */
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <boost/detail/container_fwd.hpp>
+#include "common/common_types.h"
+#include "core/file_sys/vfs.h"
+
+namespace FileSys {
+
+struct RomFSBuildDirectoryContext;
+struct RomFSBuildFileContext;
+struct RomFSDirectoryEntry;
+struct RomFSFileEntry;
+
+class RomFSBuildContext {
+public:
+    explicit RomFSBuildContext(VirtualDir base);
+    ~RomFSBuildContext();
+
+    // This finalizes the context.
+    std::map<u64, VirtualFile> Build();
+
+private:
+    VirtualDir base;
+    std::shared_ptr<RomFSBuildDirectoryContext> root;
+    std::map<std::string, std::shared_ptr<RomFSBuildDirectoryContext>, std::less<>> directories;
+    std::map<std::string, std::shared_ptr<RomFSBuildFileContext>, std::less<>> files;
+    u64 num_dirs = 0;
+    u64 num_files = 0;
+    u64 dir_table_size = 0;
+    u64 file_table_size = 0;
+    u64 dir_hash_table_size = 0;
+    u64 file_hash_table_size = 0;
+    u64 file_partition_size = 0;
+
+    void VisitDirectory(VirtualDir filesys, std::shared_ptr<RomFSBuildDirectoryContext> parent);
+
+    bool AddDirectory(std::shared_ptr<RomFSBuildDirectoryContext> parent_dir_ctx,
+                      std::shared_ptr<RomFSBuildDirectoryContext> dir_ctx);
+    bool AddFile(std::shared_ptr<RomFSBuildDirectoryContext> parent_dir_ctx,
+                 std::shared_ptr<RomFSBuildFileContext> file_ctx);
+};
+
+} // namespace FileSys
diff --git a/src/core/file_sys/nca_metadata.cpp b/src/core/file_sys/nca_metadata.cpp
index cdfbc5aaf..6f34b7836 100644
--- a/src/core/file_sys/nca_metadata.cpp
+++ b/src/core/file_sys/nca_metadata.cpp
@@ -11,11 +11,11 @@
 namespace FileSys {
 
 bool operator>=(TitleType lhs, TitleType rhs) {
-    return static_cast<size_t>(lhs) >= static_cast<size_t>(rhs);
+    return static_cast<std::size_t>(lhs) >= static_cast<std::size_t>(rhs);
 }
 
 bool operator<=(TitleType lhs, TitleType rhs) {
-    return static_cast<size_t>(lhs) <= static_cast<size_t>(rhs);
+    return static_cast<std::size_t>(lhs) <= static_cast<std::size_t>(rhs);
 }
 
 CNMT::CNMT(VirtualFile file) {
@@ -51,6 +51,8 @@ CNMT::CNMT(CNMTHeader header, OptionalHeader opt_header, std::vector<ContentReco
     : header(std::move(header)), opt_header(std::move(opt_header)),
       content_records(std::move(content_records)), meta_records(std::move(meta_records)) {}
 
+CNMT::~CNMT() = default;
+
 u64 CNMT::GetTitleID() const {
     return header.title_id;
 }
diff --git a/src/core/file_sys/nca_metadata.h b/src/core/file_sys/nca_metadata.h
index da5a8dbe8..a05d155f4 100644
--- a/src/core/file_sys/nca_metadata.h
+++ b/src/core/file_sys/nca_metadata.h
@@ -87,6 +87,7 @@ public:
     explicit CNMT(VirtualFile file);
     CNMT(CNMTHeader header, OptionalHeader opt_header, std::vector<ContentRecord> content_records,
          std::vector<MetaRecord> meta_records);
+    ~CNMT();
 
     u64 GetTitleID() const;
     u32 GetTitleVersion() const;
diff --git a/src/core/file_sys/nca_patch.cpp b/src/core/file_sys/nca_patch.cpp
index 6fc5bd7d8..0090cc6c4 100644
--- a/src/core/file_sys/nca_patch.cpp
+++ b/src/core/file_sys/nca_patch.cpp
@@ -22,11 +22,11 @@ BKTR::BKTR(VirtualFile base_romfs_, VirtualFile bktr_romfs_, RelocationBlock rel
       base_romfs(std::move(base_romfs_)), bktr_romfs(std::move(bktr_romfs_)),
       encrypted(is_encrypted_), key(key_), base_offset(base_offset_), ivfc_offset(ivfc_offset_),
       section_ctr(section_ctr_) {
-    for (size_t i = 0; i < relocation.number_buckets - 1; ++i) {
+    for (std::size_t i = 0; i < relocation.number_buckets - 1; ++i) {
         relocation_buckets[i].entries.push_back({relocation.base_offsets[i + 1], 0, 0});
     }
 
-    for (size_t i = 0; i < subsection.number_buckets - 1; ++i) {
+    for (std::size_t i = 0; i < subsection.number_buckets - 1; ++i) {
         subsection_buckets[i].entries.push_back({subsection_buckets[i + 1].entries[0].address_patch,
                                                  {0},
                                                  subsection_buckets[i + 1].entries[0].ctr});
@@ -37,7 +37,7 @@ BKTR::BKTR(VirtualFile base_romfs_, VirtualFile bktr_romfs_, RelocationBlock rel
 
 BKTR::~BKTR() = default;
 
-size_t BKTR::Read(u8* data, size_t length, size_t offset) const {
+std::size_t BKTR::Read(u8* data, std::size_t length, std::size_t offset) const {
     // Read out of bounds.
     if (offset >= relocation.size)
         return 0;
@@ -69,14 +69,14 @@ size_t BKTR::Read(u8* data, size_t length, size_t offset) const {
     std::vector<u8> iv(16);
     auto subsection_ctr = subsection.ctr;
     auto offset_iv = section_offset + base_offset;
-    for (size_t i = 0; i < section_ctr.size(); ++i)
+    for (std::size_t i = 0; i < section_ctr.size(); ++i)
         iv[i] = section_ctr[0x8 - i - 1];
     offset_iv >>= 4;
-    for (size_t i = 0; i < sizeof(u64); ++i) {
+    for (std::size_t i = 0; i < sizeof(u64); ++i) {
         iv[0xF - i] = static_cast<u8>(offset_iv & 0xFF);
         offset_iv >>= 8;
     }
-    for (size_t i = 0; i < sizeof(u32); ++i) {
+    for (std::size_t i = 0; i < sizeof(u32); ++i) {
         iv[0x7 - i] = static_cast<u8>(subsection_ctr & 0xFF);
         subsection_ctr >>= 8;
     }
@@ -110,8 +110,8 @@ size_t BKTR::Read(u8* data, size_t length, size_t offset) const {
 }
 
 template <bool Subsection, typename BlockType, typename BucketType>
-std::pair<size_t, size_t> BKTR::SearchBucketEntry(u64 offset, BlockType block,
-                                                  BucketType buckets) const {
+std::pair<std::size_t, std::size_t> BKTR::SearchBucketEntry(u64 offset, BlockType block,
+                                                            BucketType buckets) const {
     if constexpr (Subsection) {
         const auto last_bucket = buckets[block.number_buckets - 1];
         if (offset >= last_bucket.entries[last_bucket.number_entries].address_patch)
@@ -120,18 +120,18 @@ std::pair<size_t, size_t> BKTR::SearchBucketEntry(u64 offset, BlockType block,
         ASSERT_MSG(offset <= block.size, "Offset is out of bounds in BKTR relocation block.");
     }
 
-    size_t bucket_id = std::count_if(block.base_offsets.begin() + 1,
-                                     block.base_offsets.begin() + block.number_buckets,
-                                     [&offset](u64 base_offset) { return base_offset <= offset; });
+    std::size_t bucket_id = std::count_if(
+        block.base_offsets.begin() + 1, block.base_offsets.begin() + block.number_buckets,
+        [&offset](u64 base_offset) { return base_offset <= offset; });
 
     const auto bucket = buckets[bucket_id];
 
     if (bucket.number_entries == 1)
         return {bucket_id, 0};
 
-    size_t low = 0;
-    size_t mid = 0;
-    size_t high = bucket.number_entries - 1;
+    std::size_t low = 0;
+    std::size_t mid = 0;
+    std::size_t high = bucket.number_entries - 1;
     while (low <= high) {
         mid = (low + high) / 2;
         if (bucket.entries[mid].address_patch > offset) {
@@ -179,11 +179,11 @@ std::string BKTR::GetName() const {
     return base_romfs->GetName();
 }
 
-size_t BKTR::GetSize() const {
+std::size_t BKTR::GetSize() const {
     return relocation.size;
 }
 
-bool BKTR::Resize(size_t new_size) {
+bool BKTR::Resize(std::size_t new_size) {
     return false;
 }
 
@@ -199,7 +199,7 @@ bool BKTR::IsReadable() const {
     return true;
 }
 
-size_t BKTR::Write(const u8* data, size_t length, size_t offset) {
+std::size_t BKTR::Write(const u8* data, std::size_t length, std::size_t offset) {
     return 0;
 }
 
diff --git a/src/core/file_sys/nca_patch.h b/src/core/file_sys/nca_patch.h
index 381f3504f..8e64e8378 100644
--- a/src/core/file_sys/nca_patch.h
+++ b/src/core/file_sys/nca_patch.h
@@ -98,13 +98,13 @@ public:
          Core::Crypto::Key128 key, u64 base_offset, u64 ivfc_offset, std::array<u8, 8> section_ctr);
     ~BKTR() override;
 
-    size_t Read(u8* data, size_t length, size_t offset) const override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
 
     std::string GetName() const override;
 
-    size_t GetSize() const override;
+    std::size_t GetSize() const override;
 
-    bool Resize(size_t new_size) override;
+    bool Resize(std::size_t new_size) override;
 
     std::shared_ptr<VfsDirectory> GetContainingDirectory() const override;
 
@@ -112,14 +112,14 @@ public:
 
     bool IsReadable() const override;
 
-    size_t Write(const u8* data, size_t length, size_t offset) override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
 
     bool Rename(std::string_view name) override;
 
 private:
     template <bool Subsection, typename BlockType, typename BucketType>
-    std::pair<size_t, size_t> SearchBucketEntry(u64 offset, BlockType block,
-                                                BucketType buckets) const;
+    std::pair<std::size_t, std::size_t> SearchBucketEntry(u64 offset, BlockType block,
+                                                          BucketType buckets) const;
 
     RelocationEntry GetRelocationEntry(u64 offset) const;
     RelocationEntry GetNextRelocationEntry(u64 offset) const;
diff --git a/src/core/file_sys/partition_filesystem.cpp b/src/core/file_sys/partition_filesystem.cpp
index c377edc9c..5791c76ff 100644
--- a/src/core/file_sys/partition_filesystem.cpp
+++ b/src/core/file_sys/partition_filesystem.cpp
@@ -42,21 +42,21 @@ PartitionFilesystem::PartitionFilesystem(std::shared_ptr<VfsFile> file) {
 
     is_hfs = pfs_header.magic == Common::MakeMagic('H', 'F', 'S', '0');
 
-    size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
-    size_t metadata_size =
+    std::size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
+    std::size_t metadata_size =
         sizeof(Header) + (pfs_header.num_entries * entry_size) + pfs_header.strtab_size;
 
     // Actually read in now...
     std::vector<u8> file_data = file->ReadBytes(metadata_size);
-    const size_t total_size = file_data.size();
+    const std::size_t total_size = file_data.size();
 
     if (total_size != metadata_size) {
         status = Loader::ResultStatus::ErrorIncorrectPFSFileSize;
         return;
     }
 
-    size_t entries_offset = sizeof(Header);
-    size_t strtab_offset = entries_offset + (pfs_header.num_entries * entry_size);
+    std::size_t entries_offset = sizeof(Header);
+    std::size_t strtab_offset = entries_offset + (pfs_header.num_entries * entry_size);
     content_offset = strtab_offset + pfs_header.strtab_size;
     for (u16 i = 0; i < pfs_header.num_entries; i++) {
         FSEntry entry;
@@ -72,6 +72,8 @@ PartitionFilesystem::PartitionFilesystem(std::shared_ptr<VfsFile> file) {
     status = Loader::ResultStatus::Success;
 }
 
+PartitionFilesystem::~PartitionFilesystem() = default;
+
 Loader::ResultStatus PartitionFilesystem::GetStatus() const {
     return status;
 }
diff --git a/src/core/file_sys/partition_filesystem.h b/src/core/file_sys/partition_filesystem.h
index be7bc32a8..739c63a7f 100644
--- a/src/core/file_sys/partition_filesystem.h
+++ b/src/core/file_sys/partition_filesystem.h
@@ -25,6 +25,8 @@ namespace FileSys {
 class PartitionFilesystem : public ReadOnlyVfsDirectory {
 public:
     explicit PartitionFilesystem(std::shared_ptr<VfsFile> file);
+    ~PartitionFilesystem() override;
+
     Loader::ResultStatus GetStatus() const;
 
     std::vector<std::shared_ptr<VfsFile>> GetFiles() const override;
@@ -79,7 +81,7 @@ private:
 
     Header pfs_header{};
     bool is_hfs = false;
-    size_t content_offset = 0;
+    std::size_t content_offset = 0;
 
     std::vector<VirtualFile> pfs_files;
     std::vector<VirtualDir> pfs_dirs;
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 6cecab336..4b3b5e665 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -11,6 +11,7 @@
 #include "core/file_sys/patch_manager.h"
 #include "core/file_sys/registered_cache.h"
 #include "core/file_sys/romfs.h"
+#include "core/file_sys/vfs_layered.h"
 #include "core/hle/service/filesystem/filesystem.h"
 #include "core/loader/loader.h"
 
@@ -21,7 +22,7 @@ constexpr u64 SINGLE_BYTE_MODULUS = 0x100;
 std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
     std::array<u8, sizeof(u32)> bytes{};
     bytes[0] = version % SINGLE_BYTE_MODULUS;
-    for (size_t i = 1; i < bytes.size(); ++i) {
+    for (std::size_t i = 1; i < bytes.size(); ++i) {
         version /= SINGLE_BYTE_MODULUS;
         bytes[i] = version % SINGLE_BYTE_MODULUS;
     }
@@ -31,16 +32,19 @@ std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
     return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]);
 }
 
-constexpr std::array<const char*, 1> PATCH_TYPE_NAMES{
+constexpr std::array<const char*, 2> PATCH_TYPE_NAMES{
     "Update",
+    "LayeredFS",
 };
 
 std::string FormatPatchTypeName(PatchType type) {
-    return PATCH_TYPE_NAMES.at(static_cast<size_t>(type));
+    return PATCH_TYPE_NAMES.at(static_cast<std::size_t>(type));
 }
 
 PatchManager::PatchManager(u64 title_id) : title_id(title_id) {}
 
+PatchManager::~PatchManager() = default;
+
 VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
     LOG_INFO(Loader, "Patching ExeFS for title_id={:016X}", title_id);
 
@@ -64,6 +68,44 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
     return exefs;
 }
 
+static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type) {
+    const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
+    if (type != ContentRecordType::Program || load_dir == nullptr || load_dir->GetSize() <= 0) {
+        return;
+    }
+
+    auto extracted = ExtractRomFS(romfs);
+    if (extracted == nullptr) {
+        return;
+    }
+
+    auto patch_dirs = load_dir->GetSubdirectories();
+    std::sort(patch_dirs.begin(), patch_dirs.end(),
+              [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
+
+    std::vector<VirtualDir> layers;
+    layers.reserve(patch_dirs.size() + 1);
+    for (const auto& subdir : patch_dirs) {
+        auto romfs_dir = subdir->GetSubdirectory("romfs");
+        if (romfs_dir != nullptr)
+            layers.push_back(std::move(romfs_dir));
+    }
+    layers.push_back(std::move(extracted));
+
+    auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers));
+    if (layered == nullptr) {
+        return;
+    }
+
+    auto packed = CreateRomFS(std::move(layered));
+    if (packed == nullptr) {
+        return;
+    }
+
+    LOG_INFO(Loader, "    RomFS: LayeredFS patches applied successfully");
+    romfs = std::move(packed);
+}
+
 VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset,
                                      ContentRecordType type) const {
     LOG_INFO(Loader, "Patching RomFS for title_id={:016X}, type={:02X}", title_id,
@@ -87,6 +129,9 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset,
         }
     }
 
+    // LayeredFS
+    ApplyLayeredFS(romfs, title_id, type);
+
     return romfs;
 }
 
@@ -112,6 +157,10 @@ std::map<PatchType, std::string> PatchManager::GetPatchVersionNames() const {
         }
     }
 
+    const auto lfs_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
+    if (lfs_dir != nullptr && lfs_dir->GetSize() > 0)
+        out.insert_or_assign(PatchType::LayeredFS, "");
+
     return out;
 }
 
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index b521977b2..464f17515 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -26,6 +26,7 @@ std::string FormatTitleVersion(u32 version,
 
 enum class PatchType {
     Update,
+    LayeredFS,
 };
 
 std::string FormatPatchTypeName(PatchType type);
@@ -34,6 +35,7 @@ std::string FormatPatchTypeName(PatchType type);
 class PatchManager {
 public:
     explicit PatchManager(u64 title_id);
+    ~PatchManager();
 
     // Currently tracked ExeFS patches:
     // - Game Updates
@@ -41,6 +43,7 @@ public:
 
     // Currently tracked RomFS patches:
     // - Game Updates
+    // - LayeredFS
     VirtualFile PatchRomFS(VirtualFile base, u64 ivfc_offset,
                            ContentRecordType type = ContentRecordType::Program) const;
 
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index ccb685526..8903ed1d3 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -12,8 +12,12 @@
 
 namespace FileSys {
 
+ProgramMetadata::ProgramMetadata() = default;
+
+ProgramMetadata::~ProgramMetadata() = default;
+
 Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
-    size_t total_size = static_cast<size_t>(file->GetSize());
+    std::size_t total_size = static_cast<std::size_t>(file->GetSize());
     if (total_size < sizeof(Header))
         return Loader::ResultStatus::ErrorBadNPDMHeader;
 
@@ -79,10 +83,12 @@ void ProgramMetadata::Print() const {
 
     auto address_space = "Unknown";
     switch (npdm_header.address_space_type) {
-    case ProgramAddressSpaceType::Is64Bit:
+    case ProgramAddressSpaceType::Is36Bit:
+    case ProgramAddressSpaceType::Is39Bit:
         address_space = "64-bit";
         break;
     case ProgramAddressSpaceType::Is32Bit:
+    case ProgramAddressSpaceType::Is32BitNoMap:
         address_space = "32-bit";
         break;
     }
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 3c0a49f16..e4470d6f0 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -17,8 +17,10 @@ enum class ResultStatus : u16;
 namespace FileSys {
 
 enum class ProgramAddressSpaceType : u8 {
-    Is64Bit = 1,
-    Is32Bit = 2,
+    Is32Bit = 0,
+    Is36Bit = 1,
+    Is32BitNoMap = 2,
+    Is39Bit = 3,
 };
 
 enum class ProgramFilePermission : u64 {
@@ -36,6 +38,9 @@ enum class ProgramFilePermission : u64 {
  */
 class ProgramMetadata {
 public:
+    ProgramMetadata();
+    ~ProgramMetadata();
+
     Loader::ResultStatus Load(VirtualFile file);
 
     bool Is64BitProgram() const;
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp
index 7361a67be..e9b040689 100644
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -18,6 +18,10 @@
 #include "core/loader/loader.h"
 
 namespace FileSys {
+
+// The size of blocks to use when vfs raw copying into nand.
+constexpr size_t VFS_RC_LARGE_COPY_BLOCK = 0x400000;
+
 std::string RegisteredCacheEntry::DebugInfo() const {
     return fmt::format("title_id={:016X}, content_type={:02X}", title_id, static_cast<u8>(type));
 }
@@ -62,11 +66,11 @@ static std::string GetCNMTName(TitleType type, u64 title_id) {
         "" ///< Currently unknown 'DeltaTitle'
     };
 
-    auto index = static_cast<size_t>(type);
+    auto index = static_cast<std::size_t>(type);
     // If the index is after the jump in TitleType, subtract it out.
-    if (index >= static_cast<size_t>(TitleType::Application)) {
-        index -= static_cast<size_t>(TitleType::Application) -
-                 static_cast<size_t>(TitleType::FirmwarePackageB);
+    if (index >= static_cast<std::size_t>(TitleType::Application)) {
+        index -= static_cast<std::size_t>(TitleType::Application) -
+                 static_cast<std::size_t>(TitleType::FirmwarePackageB);
     }
     return fmt::format("{}_{:016x}.cnmt", TITLE_TYPE_NAMES[index], title_id);
 }
@@ -105,7 +109,7 @@ VirtualFile RegisteredCache::OpenFileOrDirectoryConcat(const VirtualDir& dir,
         } else {
             std::vector<VirtualFile> concat;
             // Since the files are a two-digit hex number, max is FF.
-            for (size_t i = 0; i < 0x100; ++i) {
+            for (std::size_t i = 0; i < 0x100; ++i) {
                 auto next = nca_dir->GetFile(fmt::format("{:02X}", i));
                 if (next != nullptr) {
                     concat.push_back(std::move(next));
@@ -121,7 +125,7 @@ VirtualFile RegisteredCache::OpenFileOrDirectoryConcat(const VirtualDir& dir,
             if (concat.empty())
                 return nullptr;
 
-            file = FileSys::ConcatenateFiles(concat);
+            file = ConcatenatedVfsFile::MakeConcatenatedFile(concat, concat.front()->GetName());
         }
 
         return file;
@@ -480,7 +484,8 @@ InstallResult RegisteredCache::RawInstallNCA(std::shared_ptr<NCA> nca, const Vfs
     auto out = dir->CreateFileRelative(path);
     if (out == nullptr)
         return InstallResult::ErrorCopyFailed;
-    return copy(in, out) ? InstallResult::Success : InstallResult::ErrorCopyFailed;
+    return copy(in, out, VFS_RC_LARGE_COPY_BLOCK) ? InstallResult::Success
+                                                  : InstallResult::ErrorCopyFailed;
 }
 
 bool RegisteredCache::RawInstallYuzuMeta(const CNMT& cnmt) {
diff --git a/src/core/file_sys/registered_cache.h b/src/core/file_sys/registered_cache.h
index f487b0cf0..c0cd59fc5 100644
--- a/src/core/file_sys/registered_cache.h
+++ b/src/core/file_sys/registered_cache.h
@@ -27,7 +27,7 @@ struct ContentRecord;
 
 using NcaID = std::array<u8, 0x10>;
 using RegisteredCacheParsingFunction = std::function<VirtualFile(const VirtualFile&, const NcaID&)>;
-using VfsCopyFunction = std::function<bool(VirtualFile, VirtualFile)>;
+using VfsCopyFunction = std::function<bool(const VirtualFile&, const VirtualFile&, size_t)>;
 
 enum class InstallResult {
     Success,
diff --git a/src/core/file_sys/romfs.cpp b/src/core/file_sys/romfs.cpp
index e490c8ace..5910f7046 100644
--- a/src/core/file_sys/romfs.cpp
+++ b/src/core/file_sys/romfs.cpp
@@ -4,8 +4,10 @@
 
 #include "common/common_types.h"
 #include "common/swap.h"
+#include "core/file_sys/fsmitm_romfsbuild.h"
 #include "core/file_sys/romfs.h"
 #include "core/file_sys/vfs.h"
+#include "core/file_sys/vfs_concat.h"
 #include "core/file_sys/vfs_offset.h"
 #include "core/file_sys/vfs_vector.h"
 
@@ -49,7 +51,7 @@ struct FileEntry {
 static_assert(sizeof(FileEntry) == 0x20, "FileEntry has incorrect size.");
 
 template <typename Entry>
-static std::pair<Entry, std::string> GetEntry(const VirtualFile& file, size_t offset) {
+static std::pair<Entry, std::string> GetEntry(const VirtualFile& file, std::size_t offset) {
     Entry entry{};
     if (file->ReadObject(&entry, offset) != sizeof(Entry))
         return {};
@@ -59,8 +61,8 @@ static std::pair<Entry, std::string> GetEntry(const VirtualFile& file, size_t of
     return {entry, string};
 }
 
-void ProcessFile(VirtualFile file, size_t file_offset, size_t data_offset, u32 this_file_offset,
-                 std::shared_ptr<VectorVfsDirectory> parent) {
+void ProcessFile(VirtualFile file, std::size_t file_offset, std::size_t data_offset,
+                 u32 this_file_offset, std::shared_ptr<VectorVfsDirectory> parent) {
     while (true) {
         auto entry = GetEntry<FileEntry>(file, file_offset + this_file_offset);
 
@@ -74,8 +76,9 @@ void ProcessFile(VirtualFile file, size_t file_offset, size_t data_offset, u32 t
     }
 }
 
-void ProcessDirectory(VirtualFile file, size_t dir_offset, size_t file_offset, size_t data_offset,
-                      u32 this_dir_offset, std::shared_ptr<VectorVfsDirectory> parent) {
+void ProcessDirectory(VirtualFile file, std::size_t dir_offset, std::size_t file_offset,
+                      std::size_t data_offset, u32 this_dir_offset,
+                      std::shared_ptr<VectorVfsDirectory> parent) {
     while (true) {
         auto entry = GetEntry<DirectoryEntry>(file, dir_offset + this_dir_offset);
         auto current = std::make_shared<VectorVfsDirectory>(
@@ -97,7 +100,7 @@ void ProcessDirectory(VirtualFile file, size_t dir_offset, size_t file_offset, s
     }
 }
 
-VirtualDir ExtractRomFS(VirtualFile file) {
+VirtualDir ExtractRomFS(VirtualFile file, RomFSExtractionType type) {
     RomFSHeader header{};
     if (file->ReadObject(&header) != sizeof(RomFSHeader))
         return nullptr;
@@ -116,9 +119,22 @@ VirtualDir ExtractRomFS(VirtualFile file) {
 
     VirtualDir out = std::move(root);
 
-    while (out->GetSubdirectory("") != nullptr)
-        out = out->GetSubdirectory("");
+    while (out->GetSubdirectories().size() == 1 && out->GetFiles().empty()) {
+        if (out->GetSubdirectories().front()->GetName() == "data" &&
+            type == RomFSExtractionType::Truncated)
+            break;
+        out = out->GetSubdirectories().front();
+    }
 
     return out;
 }
+
+VirtualFile CreateRomFS(VirtualDir dir) {
+    if (dir == nullptr)
+        return nullptr;
+
+    RomFSBuildContext ctx{dir};
+    return ConcatenatedVfsFile::MakeConcatenatedFile(0, ctx.Build(), dir->GetName());
+}
+
 } // namespace FileSys
diff --git a/src/core/file_sys/romfs.h b/src/core/file_sys/romfs.h
index e54a7d7a9..ecd1eb725 100644
--- a/src/core/file_sys/romfs.h
+++ b/src/core/file_sys/romfs.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <map>
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
@@ -12,6 +13,8 @@
 
 namespace FileSys {
 
+struct RomFSHeader;
+
 struct IVFCLevel {
     u64_le offset;
     u64_le size;
@@ -29,8 +32,18 @@ struct IVFCHeader {
 };
 static_assert(sizeof(IVFCHeader) == 0xE0, "IVFCHeader has incorrect size.");
 
+enum class RomFSExtractionType {
+    Full,      // Includes data directory
+    Truncated, // Traverses into data directory
+};
+
 // Converts a RomFS binary blob to VFS Filesystem
 // Returns nullptr on failure
-VirtualDir ExtractRomFS(VirtualFile file);
+VirtualDir ExtractRomFS(VirtualFile file,
+                        RomFSExtractionType type = RomFSExtractionType::Truncated);
+
+// Converts a VFS filesystem into a RomFS binary
+// Returns nullptr on failure
+VirtualFile CreateRomFS(VirtualDir dir);
 
 } // namespace FileSys
diff --git a/src/core/file_sys/romfs_factory.cpp b/src/core/file_sys/romfs_factory.cpp
index d9d90939e..d027a8d59 100644
--- a/src/core/file_sys/romfs_factory.cpp
+++ b/src/core/file_sys/romfs_factory.cpp
@@ -28,11 +28,13 @@ RomFSFactory::RomFSFactory(Loader::AppLoader& app_loader) {
     ivfc_offset = app_loader.ReadRomFSIVFCOffset();
 }
 
+RomFSFactory::~RomFSFactory() = default;
+
 ResultVal<VirtualFile> RomFSFactory::OpenCurrentProcess() {
     if (!updatable)
         return MakeResult<VirtualFile>(file);
 
-    const PatchManager patch_manager(Core::CurrentProcess()->program_id);
+    const PatchManager patch_manager(Core::CurrentProcess()->GetTitleID());
     return MakeResult<VirtualFile>(patch_manager.PatchRomFS(file, ivfc_offset));
 }
 
diff --git a/src/core/file_sys/romfs_factory.h b/src/core/file_sys/romfs_factory.h
index 26b8f46cc..2cace8180 100644
--- a/src/core/file_sys/romfs_factory.h
+++ b/src/core/file_sys/romfs_factory.h
@@ -30,6 +30,7 @@ enum class StorageId : u8 {
 class RomFSFactory {
 public:
     explicit RomFSFactory(Loader::AppLoader& app_loader);
+    ~RomFSFactory();
 
     ResultVal<VirtualFile> OpenCurrentProcess();
     ResultVal<VirtualFile> Open(u64 title_id, StorageId storage, ContentRecordType type);
diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp
index e437d34e5..47f2ab9e0 100644
--- a/src/core/file_sys/savedata_factory.cpp
+++ b/src/core/file_sys/savedata_factory.cpp
@@ -20,6 +20,8 @@ std::string SaveDataDescriptor::DebugInfo() const {
 
 SaveDataFactory::SaveDataFactory(VirtualDir save_directory) : dir(std::move(save_directory)) {}
 
+SaveDataFactory::~SaveDataFactory() = default;
+
 ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, SaveDataDescriptor meta) {
     if (meta.type == SaveDataType::SystemSaveData || meta.type == SaveDataType::SaveData) {
         if (meta.zero_1 != 0) {
@@ -79,16 +81,16 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ
     // According to switchbrew, if a save is of type SaveData and the title id field is 0, it should
     // be interpreted as the title id of the current process.
     if (type == SaveDataType::SaveData && title_id == 0)
-        title_id = Core::CurrentProcess()->program_id;
+        title_id = Core::CurrentProcess()->GetTitleID();
 
     std::string out;
 
     switch (space) {
     case SaveDataSpaceId::NandSystem:
-        out = "/system/save/";
+        out = "/system/";
         break;
     case SaveDataSpaceId::NandUser:
-        out = "/user/save/";
+        out = "/user/";
         break;
     default:
         ASSERT_MSG(false, "Unrecognized SaveDataSpaceId: {:02X}", static_cast<u8>(space));
@@ -96,9 +98,12 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ
 
     switch (type) {
     case SaveDataType::SystemSaveData:
-        return fmt::format("{}{:016X}/{:016X}{:016X}", out, save_id, user_id[1], user_id[0]);
+        return fmt::format("{}save/{:016X}/{:016X}{:016X}", out, save_id, user_id[1], user_id[0]);
     case SaveDataType::SaveData:
-        return fmt::format("{}{:016X}/{:016X}{:016X}/{:016X}", out, 0, user_id[1], user_id[0],
+        return fmt::format("{}save/{:016X}/{:016X}{:016X}/{:016X}", out, 0, user_id[1], user_id[0],
+                           title_id);
+    case SaveDataType::TemporaryStorage:
+        return fmt::format("{}temp/{:016X}/{:016X}{:016X}/{:016X}", out, 0, user_id[1], user_id[0],
                            title_id);
     default:
         ASSERT_MSG(false, "Unrecognized SaveDataType: {:02X}", static_cast<u8>(type));
diff --git a/src/core/file_sys/savedata_factory.h b/src/core/file_sys/savedata_factory.h
index ba978695b..d69ef6741 100644
--- a/src/core/file_sys/savedata_factory.h
+++ b/src/core/file_sys/savedata_factory.h
@@ -48,6 +48,7 @@ static_assert(sizeof(SaveDataDescriptor) == 0x40, "SaveDataDescriptor has incorr
 class SaveDataFactory {
 public:
     explicit SaveDataFactory(VirtualDir dir);
+    ~SaveDataFactory();
 
     ResultVal<VirtualDir> Open(SaveDataSpaceId space, SaveDataDescriptor meta);
 
diff --git a/src/core/file_sys/submission_package.h b/src/core/file_sys/submission_package.h
index 1120a4920..e85a2b76e 100644
--- a/src/core/file_sys/submission_package.h
+++ b/src/core/file_sys/submission_package.h
@@ -24,7 +24,7 @@ enum class ContentRecordType : u8;
 class NSP : public ReadOnlyVfsDirectory {
 public:
     explicit NSP(VirtualFile file);
-    ~NSP();
+    ~NSP() override;
 
     Loader::ResultStatus GetStatus() const;
     Loader::ResultStatus GetProgramStatus(u64 title_id) const;
diff --git a/src/core/file_sys/vfs.cpp b/src/core/file_sys/vfs.cpp
index 146c839f4..bfe50da73 100644
--- a/src/core/file_sys/vfs.cpp
+++ b/src/core/file_sys/vfs.cpp
@@ -167,18 +167,18 @@ std::string VfsFile::GetExtension() const {
 
 VfsDirectory::~VfsDirectory() = default;
 
-boost::optional<u8> VfsFile::ReadByte(size_t offset) const {
+boost::optional<u8> VfsFile::ReadByte(std::size_t offset) const {
     u8 out{};
-    size_t size = Read(&out, 1, offset);
+    std::size_t size = Read(&out, 1, offset);
     if (size == 1)
         return out;
 
     return boost::none;
 }
 
-std::vector<u8> VfsFile::ReadBytes(size_t size, size_t offset) const {
+std::vector<u8> VfsFile::ReadBytes(std::size_t size, std::size_t offset) const {
     std::vector<u8> out(size);
-    size_t read_size = Read(out.data(), size, offset);
+    std::size_t read_size = Read(out.data(), size, offset);
     out.resize(read_size);
     return out;
 }
@@ -187,11 +187,11 @@ std::vector<u8> VfsFile::ReadAllBytes() const {
     return ReadBytes(GetSize());
 }
 
-bool VfsFile::WriteByte(u8 data, size_t offset) {
+bool VfsFile::WriteByte(u8 data, std::size_t offset) {
     return Write(&data, 1, offset) == 1;
 }
 
-size_t VfsFile::WriteBytes(const std::vector<u8>& data, size_t offset) {
+std::size_t VfsFile::WriteBytes(const std::vector<u8>& data, std::size_t offset) {
     return Write(data.data(), data.size(), offset);
 }
 
@@ -215,7 +215,7 @@ std::shared_ptr<VfsFile> VfsDirectory::GetFileRelative(std::string_view path) co
     }
 
     auto dir = GetSubdirectory(vec[0]);
-    for (size_t component = 1; component < vec.size() - 1; ++component) {
+    for (std::size_t component = 1; component < vec.size() - 1; ++component) {
         if (dir == nullptr) {
             return nullptr;
         }
@@ -249,7 +249,7 @@ std::shared_ptr<VfsDirectory> VfsDirectory::GetDirectoryRelative(std::string_vie
     }
 
     auto dir = GetSubdirectory(vec[0]);
-    for (size_t component = 1; component < vec.size(); ++component) {
+    for (std::size_t component = 1; component < vec.size(); ++component) {
         if (dir == nullptr) {
             return nullptr;
         }
@@ -286,7 +286,7 @@ bool VfsDirectory::IsRoot() const {
     return GetParentDirectory() == nullptr;
 }
 
-size_t VfsDirectory::GetSize() const {
+std::size_t VfsDirectory::GetSize() const {
     const auto& files = GetFiles();
     const auto sum_sizes = [](const auto& range) {
         return std::accumulate(range.begin(), range.end(), 0ULL,
@@ -399,6 +399,15 @@ bool VfsDirectory::Copy(std::string_view src, std::string_view dest) {
     return f2->WriteBytes(f1->ReadAllBytes()) == f1->GetSize();
 }
 
+std::map<std::string, VfsEntryType, std::less<>> VfsDirectory::GetEntries() const {
+    std::map<std::string, VfsEntryType, std::less<>> out;
+    for (const auto& dir : GetSubdirectories())
+        out.emplace(dir->GetName(), VfsEntryType::Directory);
+    for (const auto& file : GetFiles())
+        out.emplace(file->GetName(), VfsEntryType::File);
+    return out;
+}
+
 std::string VfsDirectory::GetFullPath() const {
     if (IsRoot())
         return GetName();
@@ -434,13 +443,13 @@ bool ReadOnlyVfsDirectory::Rename(std::string_view name) {
     return false;
 }
 
-bool DeepEquals(const VirtualFile& file1, const VirtualFile& file2, size_t block_size) {
+bool DeepEquals(const VirtualFile& file1, const VirtualFile& file2, std::size_t block_size) {
     if (file1->GetSize() != file2->GetSize())
         return false;
 
     std::vector<u8> f1_v(block_size);
     std::vector<u8> f2_v(block_size);
-    for (size_t i = 0; i < file1->GetSize(); i += block_size) {
+    for (std::size_t i = 0; i < file1->GetSize(); i += block_size) {
         auto f1_vs = file1->Read(f1_v.data(), block_size, i);
         auto f2_vs = file2->Read(f2_v.data(), block_size, i);
 
@@ -454,13 +463,41 @@ bool DeepEquals(const VirtualFile& file1, const VirtualFile& file2, size_t block
     return true;
 }
 
-bool VfsRawCopy(VirtualFile src, VirtualFile dest) {
-    if (src == nullptr || dest == nullptr)
+bool VfsRawCopy(const VirtualFile& src, const VirtualFile& dest, std::size_t block_size) {
+    if (src == nullptr || dest == nullptr || !src->IsReadable() || !dest->IsWritable())
         return false;
     if (!dest->Resize(src->GetSize()))
         return false;
-    std::vector<u8> data = src->ReadAllBytes();
-    return dest->WriteBytes(data, 0) == data.size();
+
+    std::vector<u8> temp(std::min(block_size, src->GetSize()));
+    for (std::size_t i = 0; i < src->GetSize(); i += block_size) {
+        const auto read = std::min(block_size, src->GetSize() - i);
+        const auto block = src->Read(temp.data(), read, i);
+
+        if (dest->Write(temp.data(), read, i) != read)
+            return false;
+    }
+
+    return true;
+}
+
+bool VfsRawCopyD(const VirtualDir& src, const VirtualDir& dest, std::size_t block_size) {
+    if (src == nullptr || dest == nullptr || !src->IsReadable() || !dest->IsWritable())
+        return false;
+
+    for (const auto& file : src->GetFiles()) {
+        const auto out = dest->CreateFile(file->GetName());
+        if (!VfsRawCopy(file, out, block_size))
+            return false;
+    }
+
+    for (const auto& dir : src->GetSubdirectories()) {
+        const auto out = dest->CreateSubdirectory(dir->GetName());
+        if (!VfsRawCopyD(dir, out, block_size))
+            return false;
+    }
+
+    return true;
 }
 
 VirtualDir GetOrCreateDirectoryRelative(const VirtualDir& rel, std::string_view path) {
diff --git a/src/core/file_sys/vfs.h b/src/core/file_sys/vfs.h
index 5142a3e86..270291631 100644
--- a/src/core/file_sys/vfs.h
+++ b/src/core/file_sys/vfs.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <map>
 #include <memory>
 #include <string>
 #include <string_view>
@@ -92,9 +93,9 @@ public:
     // Retrieves the extension of the file name.
     virtual std::string GetExtension() const;
     // Retrieves the size of the file.
-    virtual size_t GetSize() const = 0;
+    virtual std::size_t GetSize() const = 0;
     // Resizes the file to new_size. Returns whether or not the operation was successful.
-    virtual bool Resize(size_t new_size) = 0;
+    virtual bool Resize(std::size_t new_size) = 0;
     // Gets a pointer to the directory containing this file, returning nullptr if there is none.
     virtual std::shared_ptr<VfsDirectory> GetContainingDirectory() const = 0;
 
@@ -105,15 +106,15 @@ public:
 
     // The primary method of reading from the file. Reads length bytes into data starting at offset
     // into file. Returns number of bytes successfully read.
-    virtual size_t Read(u8* data, size_t length, size_t offset = 0) const = 0;
+    virtual std::size_t Read(u8* data, std::size_t length, std::size_t offset = 0) const = 0;
     // The primary method of writing to the file. Writes length bytes from data starting at offset
     // into file. Returns number of bytes successfully written.
-    virtual size_t Write(const u8* data, size_t length, size_t offset = 0) = 0;
+    virtual std::size_t Write(const u8* data, std::size_t length, std::size_t offset = 0) = 0;
 
     // Reads exactly one byte at the offset provided, returning boost::none on error.
-    virtual boost::optional<u8> ReadByte(size_t offset = 0) const;
+    virtual boost::optional<u8> ReadByte(std::size_t offset = 0) const;
     // Reads size bytes starting at offset in file into a vector.
-    virtual std::vector<u8> ReadBytes(size_t size, size_t offset = 0) const;
+    virtual std::vector<u8> ReadBytes(std::size_t size, std::size_t offset = 0) const;
     // Reads all the bytes from the file into a vector. Equivalent to 'file->Read(file->GetSize(),
     // 0)'
     virtual std::vector<u8> ReadAllBytes() const;
@@ -121,7 +122,7 @@ public:
     // Reads an array of type T, size number_elements starting at offset.
     // Returns the number of bytes (sizeof(T)*number_elements) read successfully.
     template <typename T>
-    size_t ReadArray(T* data, size_t number_elements, size_t offset = 0) const {
+    std::size_t ReadArray(T* data, std::size_t number_elements, std::size_t offset = 0) const {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
 
         return Read(reinterpret_cast<u8*>(data), number_elements * sizeof(T), offset);
@@ -130,7 +131,7 @@ public:
     // Reads size bytes into the memory starting at data starting at offset into the file.
     // Returns the number of bytes read successfully.
     template <typename T>
-    size_t ReadBytes(T* data, size_t size, size_t offset = 0) const {
+    std::size_t ReadBytes(T* data, std::size_t size, std::size_t offset = 0) const {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
         return Read(reinterpret_cast<u8*>(data), size, offset);
     }
@@ -138,22 +139,22 @@ public:
     // Reads one object of type T starting at offset in file.
     // Returns the number of bytes read successfully (sizeof(T)).
     template <typename T>
-    size_t ReadObject(T* data, size_t offset = 0) const {
+    std::size_t ReadObject(T* data, std::size_t offset = 0) const {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
         return Read(reinterpret_cast<u8*>(data), sizeof(T), offset);
     }
 
     // Writes exactly one byte to offset in file and retuns whether or not the byte was written
     // successfully.
-    virtual bool WriteByte(u8 data, size_t offset = 0);
+    virtual bool WriteByte(u8 data, std::size_t offset = 0);
     // Writes a vector of bytes to offset in file and returns the number of bytes successfully
     // written.
-    virtual size_t WriteBytes(const std::vector<u8>& data, size_t offset = 0);
+    virtual std::size_t WriteBytes(const std::vector<u8>& data, std::size_t offset = 0);
 
     // Writes an array of type T, size number_elements to offset in file.
     // Returns the number of bytes (sizeof(T)*number_elements) written successfully.
     template <typename T>
-    size_t WriteArray(const T* data, size_t number_elements, size_t offset = 0) {
+    std::size_t WriteArray(const T* data, std::size_t number_elements, std::size_t offset = 0) {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
         return Write(data, number_elements * sizeof(T), offset);
     }
@@ -161,7 +162,7 @@ public:
     // Writes size bytes starting at memory location data to offset in file.
     // Returns the number of bytes written successfully.
     template <typename T>
-    size_t WriteBytes(const T* data, size_t size, size_t offset = 0) {
+    std::size_t WriteBytes(const T* data, std::size_t size, std::size_t offset = 0) {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
         return Write(reinterpret_cast<const u8*>(data), size, offset);
     }
@@ -169,7 +170,7 @@ public:
     // Writes one object of type T to offset in file.
     // Returns the number of bytes written successfully (sizeof(T)).
     template <typename T>
-    size_t WriteObject(const T& data, size_t offset = 0) {
+    std::size_t WriteObject(const T& data, std::size_t offset = 0) {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
         return Write(&data, sizeof(T), offset);
     }
@@ -221,7 +222,7 @@ public:
     // Returns the name of the directory.
     virtual std::string GetName() const = 0;
     // Returns the total size of all files and subdirectories in this directory.
-    virtual size_t GetSize() const;
+    virtual std::size_t GetSize() const;
     // Returns the parent directory of this directory. Returns nullptr if this directory is root or
     // has no parent.
     virtual std::shared_ptr<VfsDirectory> GetParentDirectory() const = 0;
@@ -265,6 +266,10 @@ public:
     // dest.
     virtual bool Copy(std::string_view src, std::string_view dest);
 
+    // Gets all of the entries directly in the directory (files and dirs), returning a map between
+    // item name -> type.
+    virtual std::map<std::string, VfsEntryType, std::less<>> GetEntries() const;
+
     // Interprets the file with name file instead as a directory of type directory.
     // The directory must have a constructor that takes a single argument of type
     // std::shared_ptr<VfsFile>. Allows to reinterpret container files (i.e NCA, zip, XCI, etc) as a
@@ -310,13 +315,19 @@ public:
     bool Rename(std::string_view name) override;
 };
 
-// Compare the two files, byte-for-byte, in increments specificed by block_size
-bool DeepEquals(const VirtualFile& file1, const VirtualFile& file2, size_t block_size = 0x200);
+// Compare the two files, byte-for-byte, in increments specified by block_size
+bool DeepEquals(const VirtualFile& file1, const VirtualFile& file2,
+                std::size_t block_size = 0x1000);
 
 // A method that copies the raw data between two different implementations of VirtualFile. If you
 // are using the same implementation, it is probably better to use the Copy method in the parent
 // directory of src/dest.
-bool VfsRawCopy(VirtualFile src, VirtualFile dest);
+bool VfsRawCopy(const VirtualFile& src, const VirtualFile& dest, std::size_t block_size = 0x1000);
+
+// A method that performs a similar function to VfsRawCopy above, but instead copies entire
+// directories. It suffers the same performance penalties as above and an implementation-specific
+// Copy should always be preferred.
+bool VfsRawCopyD(const VirtualDir& src, const VirtualDir& dest, std::size_t block_size = 0x1000);
 
 // Checks if the directory at path relative to rel exists. If it does, returns that. If it does not
 // it attempts to create it and returns the new dir or nullptr on failure.
diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp
index e6bf586a3..16d801c0c 100644
--- a/src/core/file_sys/vfs_concat.cpp
+++ b/src/core/file_sys/vfs_concat.cpp
@@ -5,28 +5,75 @@
 #include <algorithm>
 #include <utility>
 
+#include "common/assert.h"
 #include "core/file_sys/vfs_concat.h"
+#include "core/file_sys/vfs_static.h"
 
 namespace FileSys {
 
-VirtualFile ConcatenateFiles(std::vector<VirtualFile> files, std::string name) {
-    if (files.empty())
-        return nullptr;
-    if (files.size() == 1)
-        return files[0];
+static bool VerifyConcatenationMapContinuity(const std::map<u64, VirtualFile>& map) {
+    const auto last_valid = --map.end();
+    for (auto iter = map.begin(); iter != last_valid;) {
+        const auto old = iter++;
+        if (old->first + old->second->GetSize() != iter->first) {
+            return false;
+        }
+    }
 
-    return std::shared_ptr<VfsFile>(new ConcatenatedVfsFile(std::move(files), std::move(name)));
+    return map.begin()->first == 0;
 }
 
 ConcatenatedVfsFile::ConcatenatedVfsFile(std::vector<VirtualFile> files_, std::string name)
     : name(std::move(name)) {
-    size_t next_offset = 0;
+    std::size_t next_offset = 0;
     for (const auto& file : files_) {
         files[next_offset] = file;
         next_offset += file->GetSize();
     }
 }
 
+ConcatenatedVfsFile::ConcatenatedVfsFile(std::map<u64, VirtualFile> files_, std::string name)
+    : files(std::move(files_)), name(std::move(name)) {
+    ASSERT(VerifyConcatenationMapContinuity(files));
+}
+
+ConcatenatedVfsFile::~ConcatenatedVfsFile() = default;
+
+VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(std::vector<VirtualFile> files,
+                                                      std::string name) {
+    if (files.empty())
+        return nullptr;
+    if (files.size() == 1)
+        return files[0];
+
+    return std::shared_ptr<VfsFile>(new ConcatenatedVfsFile(std::move(files), std::move(name)));
+}
+
+VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte,
+                                                      std::map<u64, VirtualFile> files,
+                                                      std::string name) {
+    if (files.empty())
+        return nullptr;
+    if (files.size() == 1)
+        return files.begin()->second;
+
+    const auto last_valid = --files.end();
+    for (auto iter = files.begin(); iter != last_valid;) {
+        const auto old = iter++;
+        if (old->first + old->second->GetSize() != iter->first) {
+            files.emplace(old->first + old->second->GetSize(),
+                          std::make_shared<StaticVfsFile>(filler_byte, iter->first - old->first -
+                                                                           old->second->GetSize()));
+        }
+    }
+
+    // Ensure the map starts at offset 0 (start of file), otherwise pad to fill.
+    if (files.begin()->first != 0)
+        files.emplace(0, std::make_shared<StaticVfsFile>(filler_byte, files.begin()->first));
+
+    return std::shared_ptr<VfsFile>(new ConcatenatedVfsFile(std::move(files), std::move(name)));
+}
+
 std::string ConcatenatedVfsFile::GetName() const {
     if (files.empty())
         return "";
@@ -35,13 +82,13 @@ std::string ConcatenatedVfsFile::GetName() const {
     return files.begin()->second->GetName();
 }
 
-size_t ConcatenatedVfsFile::GetSize() const {
+std::size_t ConcatenatedVfsFile::GetSize() const {
     if (files.empty())
         return 0;
     return files.rbegin()->first + files.rbegin()->second->GetSize();
 }
 
-bool ConcatenatedVfsFile::Resize(size_t new_size) {
+bool ConcatenatedVfsFile::Resize(std::size_t new_size) {
     return false;
 }
 
@@ -59,8 +106,8 @@ bool ConcatenatedVfsFile::IsReadable() const {
     return true;
 }
 
-size_t ConcatenatedVfsFile::Read(u8* data, size_t length, size_t offset) const {
-    auto entry = files.end();
+std::size_t ConcatenatedVfsFile::Read(u8* data, std::size_t length, std::size_t offset) const {
+    auto entry = --files.end();
     for (auto iter = files.begin(); iter != files.end(); ++iter) {
         if (iter->first > offset) {
             entry = --iter;
@@ -68,27 +115,25 @@ size_t ConcatenatedVfsFile::Read(u8* data, size_t length, size_t offset) const {
         }
     }
 
-    // Check if the entry should be the last one. The loop above will make it end().
-    if (entry == files.end() && offset < files.rbegin()->first + files.rbegin()->second->GetSize())
-        --entry;
-
-    if (entry == files.end())
+    if (entry->first + entry->second->GetSize() <= offset)
         return 0;
 
-    const auto remaining = entry->second->GetSize() + offset - entry->first;
-    if (length > remaining) {
-        return entry->second->Read(data, remaining, offset - entry->first) +
-               Read(data + remaining, length - remaining, offset + remaining);
+    const auto read_in =
+        std::min<u64>(entry->first + entry->second->GetSize() - offset, entry->second->GetSize());
+    if (length > read_in) {
+        return entry->second->Read(data, read_in, offset - entry->first) +
+               Read(data + read_in, length - read_in, offset + read_in);
     }
 
-    return entry->second->Read(data, length, offset - entry->first);
+    return entry->second->Read(data, std::min<u64>(read_in, length), offset - entry->first);
 }
 
-size_t ConcatenatedVfsFile::Write(const u8* data, size_t length, size_t offset) {
+std::size_t ConcatenatedVfsFile::Write(const u8* data, std::size_t length, std::size_t offset) {
     return 0;
 }
 
 bool ConcatenatedVfsFile::Rename(std::string_view name) {
     return false;
 }
+
 } // namespace FileSys
diff --git a/src/core/file_sys/vfs_concat.h b/src/core/file_sys/vfs_concat.h
index 686d32515..c90f9d5d1 100644
--- a/src/core/file_sys/vfs_concat.h
+++ b/src/core/file_sys/vfs_concat.h
@@ -4,37 +4,43 @@
 
 #pragma once
 
+#include <map>
 #include <memory>
 #include <string_view>
-#include <boost/container/flat_map.hpp>
 #include "core/file_sys/vfs.h"
 
 namespace FileSys {
 
-// Wrapper function to allow for more efficient handling of files.size() == 0, 1 cases.
-VirtualFile ConcatenateFiles(std::vector<VirtualFile> files, std::string name = "");
-
 // Class that wraps multiple vfs files and concatenates them, making reads seamless. Currently
 // read-only.
 class ConcatenatedVfsFile : public VfsFile {
-    friend VirtualFile ConcatenateFiles(std::vector<VirtualFile> files, std::string name);
-
     ConcatenatedVfsFile(std::vector<VirtualFile> files, std::string name);
+    ConcatenatedVfsFile(std::map<u64, VirtualFile> files, std::string name);
 
 public:
+    ~ConcatenatedVfsFile() override;
+
+    /// Wrapper function to allow for more efficient handling of files.size() == 0, 1 cases.
+    static VirtualFile MakeConcatenatedFile(std::vector<VirtualFile> files, std::string name);
+
+    /// Convenience function that turns a map of offsets to files into a concatenated file, filling
+    /// gaps with a given filler byte.
+    static VirtualFile MakeConcatenatedFile(u8 filler_byte, std::map<u64, VirtualFile> files,
+                                            std::string name);
+
     std::string GetName() const override;
-    size_t GetSize() const override;
-    bool Resize(size_t new_size) override;
+    std::size_t GetSize() const override;
+    bool Resize(std::size_t new_size) override;
     std::shared_ptr<VfsDirectory> GetContainingDirectory() const override;
     bool IsWritable() const override;
     bool IsReadable() const override;
-    size_t Read(u8* data, size_t length, size_t offset) const override;
-    size_t Write(const u8* data, size_t length, size_t offset) override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
     bool Rename(std::string_view name) override;
 
 private:
     // Maps starting offset to file -- more efficient.
-    boost::container::flat_map<u64, VirtualFile> files;
+    std::map<u64, VirtualFile> files;
     std::string name;
 };
 
diff --git a/src/core/file_sys/vfs_layered.cpp b/src/core/file_sys/vfs_layered.cpp
new file mode 100644
index 000000000..bfee01725
--- /dev/null
+++ b/src/core/file_sys/vfs_layered.cpp
@@ -0,0 +1,132 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <utility>
+#include "core/file_sys/vfs_layered.h"
+
+namespace FileSys {
+
+LayeredVfsDirectory::LayeredVfsDirectory(std::vector<VirtualDir> dirs, std::string name)
+    : dirs(std::move(dirs)), name(std::move(name)) {}
+
+LayeredVfsDirectory::~LayeredVfsDirectory() = default;
+
+VirtualDir LayeredVfsDirectory::MakeLayeredDirectory(std::vector<VirtualDir> dirs,
+                                                     std::string name) {
+    if (dirs.empty())
+        return nullptr;
+    if (dirs.size() == 1)
+        return dirs[0];
+
+    return std::shared_ptr<VfsDirectory>(new LayeredVfsDirectory(std::move(dirs), std::move(name)));
+}
+
+std::shared_ptr<VfsFile> LayeredVfsDirectory::GetFileRelative(std::string_view path) const {
+    for (const auto& layer : dirs) {
+        const auto file = layer->GetFileRelative(path);
+        if (file != nullptr)
+            return file;
+    }
+
+    return nullptr;
+}
+
+std::shared_ptr<VfsDirectory> LayeredVfsDirectory::GetDirectoryRelative(
+    std::string_view path) const {
+    std::vector<VirtualDir> out;
+    for (const auto& layer : dirs) {
+        auto dir = layer->GetDirectoryRelative(path);
+        if (dir != nullptr)
+            out.push_back(std::move(dir));
+    }
+
+    return MakeLayeredDirectory(std::move(out));
+}
+
+std::shared_ptr<VfsFile> LayeredVfsDirectory::GetFile(std::string_view name) const {
+    return GetFileRelative(name);
+}
+
+std::shared_ptr<VfsDirectory> LayeredVfsDirectory::GetSubdirectory(std::string_view name) const {
+    return GetDirectoryRelative(name);
+}
+
+std::string LayeredVfsDirectory::GetFullPath() const {
+    return dirs[0]->GetFullPath();
+}
+
+std::vector<std::shared_ptr<VfsFile>> LayeredVfsDirectory::GetFiles() const {
+    std::vector<VirtualFile> out;
+    for (const auto& layer : dirs) {
+        for (const auto& file : layer->GetFiles()) {
+            if (std::find_if(out.begin(), out.end(), [&file](const VirtualFile& comp) {
+                    return comp->GetName() == file->GetName();
+                }) == out.end()) {
+                out.push_back(file);
+            }
+        }
+    }
+
+    return out;
+}
+
+std::vector<std::shared_ptr<VfsDirectory>> LayeredVfsDirectory::GetSubdirectories() const {
+    std::vector<std::string> names;
+    for (const auto& layer : dirs) {
+        for (const auto& sd : layer->GetSubdirectories()) {
+            if (std::find(names.begin(), names.end(), sd->GetName()) == names.end())
+                names.push_back(sd->GetName());
+        }
+    }
+
+    std::vector<VirtualDir> out;
+    out.reserve(names.size());
+    for (const auto& subdir : names)
+        out.push_back(GetSubdirectory(subdir));
+
+    return out;
+}
+
+bool LayeredVfsDirectory::IsWritable() const {
+    return false;
+}
+
+bool LayeredVfsDirectory::IsReadable() const {
+    return true;
+}
+
+std::string LayeredVfsDirectory::GetName() const {
+    return name.empty() ? dirs[0]->GetName() : name;
+}
+
+std::shared_ptr<VfsDirectory> LayeredVfsDirectory::GetParentDirectory() const {
+    return dirs[0]->GetParentDirectory();
+}
+
+std::shared_ptr<VfsDirectory> LayeredVfsDirectory::CreateSubdirectory(std::string_view name) {
+    return nullptr;
+}
+
+std::shared_ptr<VfsFile> LayeredVfsDirectory::CreateFile(std::string_view name) {
+    return nullptr;
+}
+
+bool LayeredVfsDirectory::DeleteSubdirectory(std::string_view name) {
+    return false;
+}
+
+bool LayeredVfsDirectory::DeleteFile(std::string_view name) {
+    return false;
+}
+
+bool LayeredVfsDirectory::Rename(std::string_view name_) {
+    name = name_;
+    return true;
+}
+
+bool LayeredVfsDirectory::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
+    return false;
+}
+} // namespace FileSys
diff --git a/src/core/file_sys/vfs_layered.h b/src/core/file_sys/vfs_layered.h
new file mode 100644
index 000000000..d85310f57
--- /dev/null
+++ b/src/core/file_sys/vfs_layered.h
@@ -0,0 +1,50 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include "core/file_sys/vfs.h"
+
+namespace FileSys {
+
+// Class that stacks multiple VfsDirectories on top of each other, attempting to read from the first
+// one and falling back to the one after. The highest priority directory (overwrites all others)
+// should be element 0 in the dirs vector.
+class LayeredVfsDirectory : public VfsDirectory {
+    LayeredVfsDirectory(std::vector<VirtualDir> dirs, std::string name);
+
+public:
+    ~LayeredVfsDirectory() override;
+
+    /// Wrapper function to allow for more efficient handling of dirs.size() == 0, 1 cases.
+    static VirtualDir MakeLayeredDirectory(std::vector<VirtualDir> dirs, std::string name = "");
+
+    std::shared_ptr<VfsFile> GetFileRelative(std::string_view path) const override;
+    std::shared_ptr<VfsDirectory> GetDirectoryRelative(std::string_view path) const override;
+    std::shared_ptr<VfsFile> GetFile(std::string_view name) const override;
+    std::shared_ptr<VfsDirectory> GetSubdirectory(std::string_view name) const override;
+    std::string GetFullPath() const override;
+
+    std::vector<std::shared_ptr<VfsFile>> GetFiles() const override;
+    std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override;
+    bool IsWritable() const override;
+    bool IsReadable() const override;
+    std::string GetName() const override;
+    std::shared_ptr<VfsDirectory> GetParentDirectory() const override;
+    std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override;
+    std::shared_ptr<VfsFile> CreateFile(std::string_view name) override;
+    bool DeleteSubdirectory(std::string_view name) override;
+    bool DeleteFile(std::string_view name) override;
+    bool Rename(std::string_view name) override;
+
+protected:
+    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
+
+private:
+    std::vector<VirtualDir> dirs;
+    std::string name;
+};
+
+} // namespace FileSys
diff --git a/src/core/file_sys/vfs_offset.cpp b/src/core/file_sys/vfs_offset.cpp
index 847cde2f5..a4c6719a0 100644
--- a/src/core/file_sys/vfs_offset.cpp
+++ b/src/core/file_sys/vfs_offset.cpp
@@ -9,20 +9,22 @@
 
 namespace FileSys {
 
-OffsetVfsFile::OffsetVfsFile(std::shared_ptr<VfsFile> file_, size_t size_, size_t offset_,
+OffsetVfsFile::OffsetVfsFile(std::shared_ptr<VfsFile> file_, std::size_t size_, std::size_t offset_,
                              std::string name_, VirtualDir parent_)
     : file(file_), offset(offset_), size(size_), name(std::move(name_)),
       parent(parent_ == nullptr ? file->GetContainingDirectory() : std::move(parent_)) {}
 
+OffsetVfsFile::~OffsetVfsFile() = default;
+
 std::string OffsetVfsFile::GetName() const {
     return name.empty() ? file->GetName() : name;
 }
 
-size_t OffsetVfsFile::GetSize() const {
+std::size_t OffsetVfsFile::GetSize() const {
     return size;
 }
 
-bool OffsetVfsFile::Resize(size_t new_size) {
+bool OffsetVfsFile::Resize(std::size_t new_size) {
     if (offset + new_size < file->GetSize()) {
         size = new_size;
     } else {
@@ -47,22 +49,22 @@ bool OffsetVfsFile::IsReadable() const {
     return file->IsReadable();
 }
 
-size_t OffsetVfsFile::Read(u8* data, size_t length, size_t r_offset) const {
+std::size_t OffsetVfsFile::Read(u8* data, std::size_t length, std::size_t r_offset) const {
     return file->Read(data, TrimToFit(length, r_offset), offset + r_offset);
 }
 
-size_t OffsetVfsFile::Write(const u8* data, size_t length, size_t r_offset) {
+std::size_t OffsetVfsFile::Write(const u8* data, std::size_t length, std::size_t r_offset) {
     return file->Write(data, TrimToFit(length, r_offset), offset + r_offset);
 }
 
-boost::optional<u8> OffsetVfsFile::ReadByte(size_t r_offset) const {
+boost::optional<u8> OffsetVfsFile::ReadByte(std::size_t r_offset) const {
     if (r_offset < size)
         return file->ReadByte(offset + r_offset);
 
     return boost::none;
 }
 
-std::vector<u8> OffsetVfsFile::ReadBytes(size_t r_size, size_t r_offset) const {
+std::vector<u8> OffsetVfsFile::ReadBytes(std::size_t r_size, std::size_t r_offset) const {
     return file->ReadBytes(TrimToFit(r_size, r_offset), offset + r_offset);
 }
 
@@ -70,14 +72,14 @@ std::vector<u8> OffsetVfsFile::ReadAllBytes() const {
     return file->ReadBytes(size, offset);
 }
 
-bool OffsetVfsFile::WriteByte(u8 data, size_t r_offset) {
+bool OffsetVfsFile::WriteByte(u8 data, std::size_t r_offset) {
     if (r_offset < size)
         return file->WriteByte(data, offset + r_offset);
 
     return false;
 }
 
-size_t OffsetVfsFile::WriteBytes(const std::vector<u8>& data, size_t r_offset) {
+std::size_t OffsetVfsFile::WriteBytes(const std::vector<u8>& data, std::size_t r_offset) {
     return file->Write(data.data(), TrimToFit(data.size(), r_offset), offset + r_offset);
 }
 
@@ -85,12 +87,12 @@ bool OffsetVfsFile::Rename(std::string_view name) {
     return file->Rename(name);
 }
 
-size_t OffsetVfsFile::GetOffset() const {
+std::size_t OffsetVfsFile::GetOffset() const {
     return offset;
 }
 
-size_t OffsetVfsFile::TrimToFit(size_t r_size, size_t r_offset) const {
-    return std::clamp(r_size, size_t{0}, size - r_offset);
+std::size_t OffsetVfsFile::TrimToFit(std::size_t r_size, std::size_t r_offset) const {
+    return std::clamp(r_size, std::size_t{0}, size - r_offset);
 }
 
 } // namespace FileSys
diff --git a/src/core/file_sys/vfs_offset.h b/src/core/file_sys/vfs_offset.h
index cb92d1570..8062702a7 100644
--- a/src/core/file_sys/vfs_offset.h
+++ b/src/core/file_sys/vfs_offset.h
@@ -17,33 +17,34 @@ namespace FileSys {
 // the size of this wrapper.
 class OffsetVfsFile : public VfsFile {
 public:
-    OffsetVfsFile(std::shared_ptr<VfsFile> file, size_t size, size_t offset = 0,
+    OffsetVfsFile(std::shared_ptr<VfsFile> file, std::size_t size, std::size_t offset = 0,
                   std::string new_name = "", VirtualDir new_parent = nullptr);
+    ~OffsetVfsFile() override;
 
     std::string GetName() const override;
-    size_t GetSize() const override;
-    bool Resize(size_t new_size) override;
+    std::size_t GetSize() const override;
+    bool Resize(std::size_t new_size) override;
     std::shared_ptr<VfsDirectory> GetContainingDirectory() const override;
     bool IsWritable() const override;
     bool IsReadable() const override;
-    size_t Read(u8* data, size_t length, size_t offset) const override;
-    size_t Write(const u8* data, size_t length, size_t offset) override;
-    boost::optional<u8> ReadByte(size_t offset) const override;
-    std::vector<u8> ReadBytes(size_t size, size_t offset) const override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
+    boost::optional<u8> ReadByte(std::size_t offset) const override;
+    std::vector<u8> ReadBytes(std::size_t size, std::size_t offset) const override;
     std::vector<u8> ReadAllBytes() const override;
-    bool WriteByte(u8 data, size_t offset) override;
-    size_t WriteBytes(const std::vector<u8>& data, size_t offset) override;
+    bool WriteByte(u8 data, std::size_t offset) override;
+    std::size_t WriteBytes(const std::vector<u8>& data, std::size_t offset) override;
 
     bool Rename(std::string_view name) override;
 
-    size_t GetOffset() const;
+    std::size_t GetOffset() const;
 
 private:
-    size_t TrimToFit(size_t r_size, size_t r_offset) const;
+    std::size_t TrimToFit(std::size_t r_size, std::size_t r_offset) const;
 
     std::shared_ptr<VfsFile> file;
-    size_t offset;
-    size_t size;
+    std::size_t offset;
+    std::size_t size;
     std::string name;
     VirtualDir parent;
 };
diff --git a/src/core/file_sys/vfs_real.cpp b/src/core/file_sys/vfs_real.cpp
index 89b101145..9defad04c 100644
--- a/src/core/file_sys/vfs_real.cpp
+++ b/src/core/file_sys/vfs_real.cpp
@@ -227,11 +227,11 @@ std::string RealVfsFile::GetName() const {
     return path_components.back();
 }
 
-size_t RealVfsFile::GetSize() const {
+std::size_t RealVfsFile::GetSize() const {
     return backing->GetSize();
 }
 
-bool RealVfsFile::Resize(size_t new_size) {
+bool RealVfsFile::Resize(std::size_t new_size) {
     return backing->Resize(new_size);
 }
 
@@ -247,13 +247,13 @@ bool RealVfsFile::IsReadable() const {
     return (perms & Mode::ReadWrite) != 0;
 }
 
-size_t RealVfsFile::Read(u8* data, size_t length, size_t offset) const {
+std::size_t RealVfsFile::Read(u8* data, std::size_t length, std::size_t offset) const {
     if (!backing->Seek(offset, SEEK_SET))
         return 0;
     return backing->ReadBytes(data, length);
 }
 
-size_t RealVfsFile::Write(const u8* data, size_t length, size_t offset) {
+std::size_t RealVfsFile::Write(const u8* data, std::size_t length, std::size_t offset) {
     if (!backing->Seek(offset, SEEK_SET))
         return 0;
     return backing->WriteBytes(data, length);
@@ -413,6 +413,23 @@ std::string RealVfsDirectory::GetFullPath() const {
     return out;
 }
 
+std::map<std::string, VfsEntryType, std::less<>> RealVfsDirectory::GetEntries() const {
+    if (perms == Mode::Append)
+        return {};
+
+    std::map<std::string, VfsEntryType, std::less<>> out;
+    FileUtil::ForeachDirectoryEntry(
+        nullptr, path,
+        [&out](u64* entries_out, const std::string& directory, const std::string& filename) {
+            const std::string full_path = directory + DIR_SEP + filename;
+            out.emplace(filename, FileUtil::IsDirectory(full_path) ? VfsEntryType::Directory
+                                                                   : VfsEntryType::File);
+            return true;
+        });
+
+    return out;
+}
+
 bool RealVfsDirectory::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
     return false;
 }
diff --git a/src/core/file_sys/vfs_real.h b/src/core/file_sys/vfs_real.h
index 7db86691f..5b61db90d 100644
--- a/src/core/file_sys/vfs_real.h
+++ b/src/core/file_sys/vfs_real.h
@@ -48,13 +48,13 @@ public:
     ~RealVfsFile() override;
 
     std::string GetName() const override;
-    size_t GetSize() const override;
-    bool Resize(size_t new_size) override;
+    std::size_t GetSize() const override;
+    bool Resize(std::size_t new_size) override;
     std::shared_ptr<VfsDirectory> GetContainingDirectory() const override;
     bool IsWritable() const override;
     bool IsReadable() const override;
-    size_t Read(u8* data, size_t length, size_t offset) const override;
-    size_t Write(const u8* data, size_t length, size_t offset) override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
     bool Rename(std::string_view name) override;
 
 private:
@@ -98,6 +98,7 @@ public:
     bool DeleteFile(std::string_view name) override;
     bool Rename(std::string_view name) override;
     std::string GetFullPath() const override;
+    std::map<std::string, VfsEntryType, std::less<>> GetEntries() const override;
 
 protected:
     bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
diff --git a/src/core/file_sys/vfs_static.h b/src/core/file_sys/vfs_static.h
new file mode 100644
index 000000000..44fab51d1
--- /dev/null
+++ b/src/core/file_sys/vfs_static.h
@@ -0,0 +1,79 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <string_view>
+
+#include "core/file_sys/vfs.h"
+
+namespace FileSys {
+
+class StaticVfsFile : public VfsFile {
+public:
+    explicit StaticVfsFile(u8 value, std::size_t size = 0, std::string name = "",
+                           VirtualDir parent = nullptr)
+        : value{value}, size{size}, name{std::move(name)}, parent{std::move(parent)} {}
+
+    std::string GetName() const override {
+        return name;
+    }
+
+    std::size_t GetSize() const override {
+        return size;
+    }
+
+    bool Resize(std::size_t new_size) override {
+        size = new_size;
+        return true;
+    }
+
+    std::shared_ptr<VfsDirectory> GetContainingDirectory() const override {
+        return parent;
+    }
+
+    bool IsWritable() const override {
+        return false;
+    }
+
+    bool IsReadable() const override {
+        return true;
+    }
+
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override {
+        const auto read = std::min(length, size - offset);
+        std::fill(data, data + read, value);
+        return read;
+    }
+
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override {
+        return 0;
+    }
+
+    boost::optional<u8> ReadByte(std::size_t offset) const override {
+        if (offset < size)
+            return value;
+        return boost::none;
+    }
+
+    std::vector<u8> ReadBytes(std::size_t length, std::size_t offset) const override {
+        const auto read = std::min(length, size - offset);
+        return std::vector<u8>(read, value);
+    }
+
+    bool Rename(std::string_view new_name) override {
+        name = new_name;
+        return true;
+    }
+
+private:
+    u8 value;
+    std::size_t size;
+    std::string name;
+    VirtualDir parent;
+};
+
+} // namespace FileSys
diff --git a/src/core/file_sys/vfs_vector.cpp b/src/core/file_sys/vfs_vector.cpp
index 98e7c4598..389c7e003 100644
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -3,16 +3,72 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <cstring>
 #include <utility>
 #include "core/file_sys/vfs_vector.h"
 
 namespace FileSys {
+VectorVfsFile::VectorVfsFile(std::vector<u8> initial_data, std::string name, VirtualDir parent)
+    : data(std::move(initial_data)), parent(std::move(parent)), name(std::move(name)) {}
+
+VectorVfsFile::~VectorVfsFile() = default;
+
+std::string VectorVfsFile::GetName() const {
+    return name;
+}
+
+size_t VectorVfsFile::GetSize() const {
+    return data.size();
+}
+
+bool VectorVfsFile::Resize(size_t new_size) {
+    data.resize(new_size);
+    return true;
+}
+
+std::shared_ptr<VfsDirectory> VectorVfsFile::GetContainingDirectory() const {
+    return parent;
+}
+
+bool VectorVfsFile::IsWritable() const {
+    return true;
+}
+
+bool VectorVfsFile::IsReadable() const {
+    return true;
+}
+
+std::size_t VectorVfsFile::Read(u8* data_, std::size_t length, std::size_t offset) const {
+    const auto read = std::min(length, data.size() - offset);
+    std::memcpy(data_, data.data() + offset, read);
+    return read;
+}
+
+std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_t offset) {
+    if (offset + length > data.size())
+        data.resize(offset + length);
+    const auto write = std::min(length, data.size() - offset);
+    std::memcpy(data.data(), data_, write);
+    return write;
+}
+
+bool VectorVfsFile::Rename(std::string_view name_) {
+    name = name_;
+    return true;
+}
+
+void VectorVfsFile::Assign(std::vector<u8> new_data) {
+    data = std::move(new_data);
+}
+
 VectorVfsDirectory::VectorVfsDirectory(std::vector<VirtualFile> files_,
                                        std::vector<VirtualDir> dirs_, std::string name_,
                                        VirtualDir parent_)
     : files(std::move(files_)), dirs(std::move(dirs_)), parent(std::move(parent_)),
       name(std::move(name_)) {}
 
+VectorVfsDirectory::~VectorVfsDirectory() = default;
+
 std::vector<std::shared_ptr<VfsFile>> VectorVfsDirectory::GetFiles() const {
     return files;
 }
diff --git a/src/core/file_sys/vfs_vector.h b/src/core/file_sys/vfs_vector.h
index 179f62e4b..48a414c98 100644
--- a/src/core/file_sys/vfs_vector.h
+++ b/src/core/file_sys/vfs_vector.h
@@ -8,6 +8,31 @@
 
 namespace FileSys {
 
+// An implementation of VfsFile that is backed by a vector optionally supplied upon construction
+class VectorVfsFile : public VfsFile {
+public:
+    explicit VectorVfsFile(std::vector<u8> initial_data = {}, std::string name = "",
+                           VirtualDir parent = nullptr);
+    ~VectorVfsFile() override;
+
+    std::string GetName() const override;
+    std::size_t GetSize() const override;
+    bool Resize(std::size_t new_size) override;
+    std::shared_ptr<VfsDirectory> GetContainingDirectory() const override;
+    bool IsWritable() const override;
+    bool IsReadable() const override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
+    bool Rename(std::string_view name) override;
+
+    virtual void Assign(std::vector<u8> new_data);
+
+private:
+    std::vector<u8> data;
+    VirtualDir parent;
+    std::string name;
+};
+
 // An implementation of VfsDirectory that maintains two vectors for subdirectories and files.
 // Vector data is supplied upon construction.
 class VectorVfsDirectory : public VfsDirectory {
@@ -15,6 +40,7 @@ public:
     explicit VectorVfsDirectory(std::vector<VirtualFile> files = {},
                                 std::vector<VirtualDir> dirs = {}, std::string name = "",
                                 VirtualDir parent = nullptr);
+    ~VectorVfsDirectory() override;
 
     std::vector<std::shared_ptr<VfsFile>> GetFiles() const override;
     std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override;
diff --git a/src/core/file_sys/xts_archive.cpp b/src/core/file_sys/xts_archive.cpp
index 4dbc25c55..b2b164368 100644
--- a/src/core/file_sys/xts_archive.cpp
+++ b/src/core/file_sys/xts_archive.cpp
@@ -25,14 +25,11 @@ namespace FileSys {
 constexpr u64 NAX_HEADER_PADDING_SIZE = 0x4000;
 
 template <typename SourceData, typename SourceKey, typename Destination>
-static bool CalculateHMAC256(Destination* out, const SourceKey* key, size_t key_length,
-                             const SourceData* data, size_t data_length) {
+static bool CalculateHMAC256(Destination* out, const SourceKey* key, std::size_t key_length,
+                             const SourceData* data, std::size_t data_length) {
     mbedtls_md_context_t context;
     mbedtls_md_init(&context);
 
-    const auto key_f = reinterpret_cast<const u8*>(key);
-    const std::vector<u8> key_v(key_f, key_f + key_length);
-
     if (mbedtls_md_setup(&context, mbedtls_md_info_from_type(MBEDTLS_MD_SHA256), 1) ||
         mbedtls_md_hmac_starts(&context, reinterpret_cast<const u8*>(key), key_length) ||
         mbedtls_md_hmac_update(&context, reinterpret_cast<const u8*>(data), data_length) ||
@@ -45,7 +42,7 @@ static bool CalculateHMAC256(Destination* out, const SourceKey* key, size_t key_
     return true;
 }
 
-NAX::NAX(VirtualFile file_) : file(std::move(file_)), header(std::make_unique<NAXHeader>()) {
+NAX::NAX(VirtualFile file_) : header(std::make_unique<NAXHeader>()), file(std::move(file_)) {
     std::string path = FileUtil::SanitizePath(file->GetFullPath());
     static const std::regex nax_path_regex("/registered/(000000[0-9A-F]{2})/([0-9A-F]{32})\\.nca",
                                            std::regex_constants::ECMAScript |
@@ -65,13 +62,15 @@ NAX::NAX(VirtualFile file_) : file(std::move(file_)), header(std::make_unique<NA
 }
 
 NAX::NAX(VirtualFile file_, std::array<u8, 0x10> nca_id)
-    : file(std::move(file_)), header(std::make_unique<NAXHeader>()) {
+    : header(std::make_unique<NAXHeader>()), file(std::move(file_)) {
     Core::Crypto::SHA256Hash hash{};
     mbedtls_sha256(nca_id.data(), nca_id.size(), hash.data(), 0);
     status = Parse(fmt::format("/registered/000000{:02X}/{}.nca", hash[0],
                                Common::HexArrayToString(nca_id, false)));
 }
 
+NAX::~NAX() = default;
+
 Loader::ResultStatus NAX::Parse(std::string_view path) {
     if (file->ReadObject(header.get()) != sizeof(NAXHeader))
         return Loader::ResultStatus::ErrorBadNAXHeader;
@@ -91,7 +90,7 @@ Loader::ResultStatus NAX::Parse(std::string_view path) {
 
     const auto enc_keys = header->key_area;
 
-    size_t i = 0;
+    std::size_t i = 0;
     for (; i < sd_keys.size(); ++i) {
         std::array<Core::Crypto::Key128, 2> nax_keys{};
         if (!CalculateHMAC256(nax_keys.data(), sd_keys[i].data(), 0x10, std::string(path).c_str(),
@@ -99,7 +98,7 @@ Loader::ResultStatus NAX::Parse(std::string_view path) {
             return Loader::ResultStatus::ErrorNAXKeyHMACFailed;
         }
 
-        for (size_t j = 0; j < nax_keys.size(); ++j) {
+        for (std::size_t j = 0; j < nax_keys.size(); ++j) {
             Core::Crypto::AESCipher<Core::Crypto::Key128> cipher(nax_keys[j],
                                                                  Core::Crypto::Mode::ECB);
             cipher.Transcode(enc_keys[j].data(), 0x10, header->key_area[j].data(),
@@ -138,9 +137,9 @@ VirtualFile NAX::GetDecrypted() const {
     return dec_file;
 }
 
-std::shared_ptr<NCA> NAX::AsNCA() const {
+std::unique_ptr<NCA> NAX::AsNCA() const {
     if (type == NAXContentType::NCA)
-        return std::make_shared<NCA>(GetDecrypted());
+        return std::make_unique<NCA>(GetDecrypted());
     return nullptr;
 }
 
diff --git a/src/core/file_sys/xts_archive.h b/src/core/file_sys/xts_archive.h
index 55d2154a6..8fedd8585 100644
--- a/src/core/file_sys/xts_archive.h
+++ b/src/core/file_sys/xts_archive.h
@@ -33,12 +33,13 @@ class NAX : public ReadOnlyVfsDirectory {
 public:
     explicit NAX(VirtualFile file);
     explicit NAX(VirtualFile file, std::array<u8, 0x10> nca_id);
+    ~NAX() override;
 
     Loader::ResultStatus GetStatus() const;
 
     VirtualFile GetDecrypted() const;
 
-    std::shared_ptr<NCA> AsNCA() const;
+    std::unique_ptr<NCA> AsNCA() const;
 
     NAXContentType GetContentType() const;
 
@@ -60,7 +61,7 @@ private:
 
     VirtualFile file;
     Loader::ResultStatus status;
-    NAXContentType type;
+    NAXContentType type{};
 
     VirtualFile dec_file;
 
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 332e5c3d0..5bc947010 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -37,7 +37,9 @@
 #include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/vm_manager.h"
 #include "core/loader/loader.h"
 #include "core/memory.h"
 
@@ -65,9 +67,9 @@ constexpr u32 MSG_WAITALL = 8;
 constexpr u32 LR_REGISTER = 30;
 constexpr u32 SP_REGISTER = 31;
 constexpr u32 PC_REGISTER = 32;
-constexpr u32 CPSR_REGISTER = 33;
+constexpr u32 PSTATE_REGISTER = 33;
 constexpr u32 UC_ARM64_REG_Q0 = 34;
-constexpr u32 FPSCR_REGISTER = 66;
+constexpr u32 FPCR_REGISTER = 66;
 
 // TODO/WiP - Used while working on support for FPU
 constexpr u32 TODO_DUMMY_REG_997 = 997;
@@ -116,7 +118,7 @@ constexpr char target_xml[] =
 
     <reg name="pc" bitsize="64" type="code_ptr"/>
 
-    <flags id="cpsr_flags" size="4">
+    <flags id="pstate_flags" size="4">
       <field name="SP" start="0" end="0"/>
       <field name="" start="1" end="1"/>
       <field name="EL" start="2" end="3"/>
@@ -135,7 +137,7 @@ constexpr char target_xml[] =
       <field name="Z" start="30" end="30"/>
       <field name="N" start="31" end="31"/>
     </flags>
-    <reg name="cpsr" bitsize="32" type="cpsr_flags"/>
+    <reg name="pstate" bitsize="32" type="pstate_flags"/>
   </feature>
   <feature name="org.gnu.gdb.aarch64.fpu">
   </feature>
@@ -227,10 +229,10 @@ static u64 RegRead(std::size_t id, Kernel::Thread* thread = nullptr) {
         return thread->context.sp;
     } else if (id == PC_REGISTER) {
         return thread->context.pc;
-    } else if (id == CPSR_REGISTER) {
-        return thread->context.cpsr;
-    } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) {
-        return thread->context.fpu_registers[id - UC_ARM64_REG_Q0][0];
+    } else if (id == PSTATE_REGISTER) {
+        return thread->context.pstate;
+    } else if (id > PSTATE_REGISTER && id < FPCR_REGISTER) {
+        return thread->context.vector_registers[id - UC_ARM64_REG_Q0][0];
     } else {
         return 0;
     }
@@ -247,10 +249,10 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr)
         thread->context.sp = val;
     } else if (id == PC_REGISTER) {
         thread->context.pc = val;
-    } else if (id == CPSR_REGISTER) {
-        thread->context.cpsr = val;
-    } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) {
-        thread->context.fpu_registers[id - (CPSR_REGISTER + 1)][0] = val;
+    } else if (id == PSTATE_REGISTER) {
+        thread->context.pstate = static_cast<u32>(val);
+    } else if (id > PSTATE_REGISTER && id < FPCR_REGISTER) {
+        thread->context.vector_registers[id - (PSTATE_REGISTER + 1)][0] = val;
     }
 }
 
@@ -292,7 +294,7 @@ static u8 NibbleToHex(u8 n) {
  * @param src Pointer to array of output hex string characters.
  * @param len Length of src array.
  */
-static u32 HexToInt(const u8* src, size_t len) {
+static u32 HexToInt(const u8* src, std::size_t len) {
     u32 output = 0;
     while (len-- > 0) {
         output = (output << 4) | HexCharToValue(src[0]);
@@ -307,7 +309,7 @@ static u32 HexToInt(const u8* src, size_t len) {
  * @param src Pointer to array of output hex string characters.
  * @param len Length of src array.
  */
-static u64 HexToLong(const u8* src, size_t len) {
+static u64 HexToLong(const u8* src, std::size_t len) {
     u64 output = 0;
     while (len-- > 0) {
         output = (output << 4) | HexCharToValue(src[0]);
@@ -323,7 +325,7 @@ static u64 HexToLong(const u8* src, size_t len) {
  * @param src Pointer to array of u8 bytes.
  * @param len Length of src array.
  */
-static void MemToGdbHex(u8* dest, const u8* src, size_t len) {
+static void MemToGdbHex(u8* dest, const u8* src, std::size_t len) {
     while (len-- > 0) {
         u8 tmp = *src++;
         *dest++ = NibbleToHex(tmp >> 4);
@@ -338,7 +340,7 @@ static void MemToGdbHex(u8* dest, const u8* src, size_t len) {
  * @param src Pointer to array of output hex string characters.
  * @param len Length of src array.
  */
-static void GdbHexToMem(u8* dest, const u8* src, size_t len) {
+static void GdbHexToMem(u8* dest, const u8* src, std::size_t len) {
     while (len-- > 0) {
         *dest++ = (HexCharToValue(src[0]) << 4) | HexCharToValue(src[1]);
         src += 2;
@@ -406,7 +408,7 @@ static u64 GdbHexToLong(const u8* src) {
 /// Read a byte from the gdb client.
 static u8 ReadByte() {
     u8 c;
-    size_t received_size = recv(gdbserver_socket, reinterpret_cast<char*>(&c), 1, MSG_WAITALL);
+    std::size_t received_size = recv(gdbserver_socket, reinterpret_cast<char*>(&c), 1, MSG_WAITALL);
     if (received_size != 1) {
         LOG_ERROR(Debug_GDBStub, "recv failed: {}", received_size);
         Shutdown();
@@ -416,7 +418,7 @@ static u8 ReadByte() {
 }
 
 /// Calculate the checksum of the current command buffer.
-static u8 CalculateChecksum(const u8* buffer, size_t length) {
+static u8 CalculateChecksum(const u8* buffer, std::size_t length) {
     return static_cast<u8>(std::accumulate(buffer, buffer + length, 0, std::plus<u8>()));
 }
 
@@ -518,7 +520,7 @@ bool CheckBreakpoint(VAddr addr, BreakpointType type) {
  * @param packet Packet to be sent to client.
  */
 static void SendPacket(const char packet) {
-    size_t sent_size = send(gdbserver_socket, &packet, 1, 0);
+    std::size_t sent_size = send(gdbserver_socket, &packet, 1, 0);
     if (sent_size != 1) {
         LOG_ERROR(Debug_GDBStub, "send failed");
     }
@@ -585,7 +587,8 @@ static void HandleQuery() {
                        strlen("Xfer:features:read:target.xml:")) == 0) {
         SendReply(target_xml);
     } else if (strncmp(query, "Offsets", strlen("Offsets")) == 0) {
-        std::string buffer = fmt::format("TextSeg={:0x}", Memory::PROCESS_IMAGE_VADDR);
+        const VAddr base_address = Core::CurrentProcess()->VMManager().GetCodeRegionBaseAddress();
+        std::string buffer = fmt::format("TextSeg={:0x}", base_address);
         SendReply(buffer.c_str());
     } else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) {
         std::string val = "m";
@@ -781,11 +784,11 @@ static void ReadRegister() {
         LongToGdbHex(reply, RegRead(id, current_thread));
     } else if (id == PC_REGISTER) {
         LongToGdbHex(reply, RegRead(id, current_thread));
-    } else if (id == CPSR_REGISTER) {
-        IntToGdbHex(reply, (u32)RegRead(id, current_thread));
-    } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) {
+    } else if (id == PSTATE_REGISTER) {
+        IntToGdbHex(reply, static_cast<u32>(RegRead(id, current_thread)));
+    } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
         LongToGdbHex(reply, RegRead(id, current_thread));
-    } else if (id == FPSCR_REGISTER) {
+    } else if (id == FPCR_REGISTER) {
         LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_998, current_thread));
     } else {
         LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_997, current_thread));
@@ -811,7 +814,7 @@ static void ReadRegisters() {
 
     bufptr += 16;
 
-    IntToGdbHex(bufptr, (u32)RegRead(CPSR_REGISTER, current_thread));
+    IntToGdbHex(bufptr, static_cast<u32>(RegRead(PSTATE_REGISTER, current_thread)));
 
     bufptr += 8;
 
@@ -843,11 +846,11 @@ static void WriteRegister() {
         RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
     } else if (id == PC_REGISTER) {
         RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
-    } else if (id == CPSR_REGISTER) {
+    } else if (id == PSTATE_REGISTER) {
         RegWrite(id, GdbHexToInt(buffer_ptr), current_thread);
-    } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) {
+    } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
         RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
-    } else if (id == FPSCR_REGISTER) {
+    } else if (id == FPCR_REGISTER) {
         RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr), current_thread);
     } else {
         RegWrite(TODO_DUMMY_REG_997, GdbHexToLong(buffer_ptr), current_thread);
@@ -866,16 +869,16 @@ static void WriteRegisters() {
     if (command_buffer[0] != 'G')
         return SendReply("E01");
 
-    for (u32 i = 0, reg = 0; reg <= FPSCR_REGISTER; i++, reg++) {
+    for (u32 i = 0, reg = 0; reg <= FPCR_REGISTER; i++, reg++) {
         if (reg <= SP_REGISTER) {
             RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
         } else if (reg == PC_REGISTER) {
             RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread);
-        } else if (reg == CPSR_REGISTER) {
-            RegWrite(CPSR_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread);
-        } else if (reg >= UC_ARM64_REG_Q0 && reg < FPSCR_REGISTER) {
+        } else if (reg == PSTATE_REGISTER) {
+            RegWrite(PSTATE_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread);
+        } else if (reg >= UC_ARM64_REG_Q0 && reg < FPCR_REGISTER) {
             RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
-        } else if (reg == FPSCR_REGISTER) {
+        } else if (reg == FPCR_REGISTER) {
             RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr + i * 16), current_thread);
         } else {
             UNIMPLEMENTED();
@@ -893,11 +896,11 @@ static void ReadMemory() {
     static u8 reply[GDB_BUFFER_SIZE - 4];
 
     auto start_offset = command_buffer + 1;
-    auto addr_pos = std::find(start_offset, command_buffer + command_length, ',');
-    VAddr addr = HexToLong(start_offset, static_cast<u64>(addr_pos - start_offset));
+    const auto addr_pos = std::find(start_offset, command_buffer + command_length, ',');
+    const VAddr addr = HexToLong(start_offset, static_cast<u64>(addr_pos - start_offset));
 
     start_offset = addr_pos + 1;
-    u64 len =
+    const u64 len =
         HexToLong(start_offset, static_cast<u64>((command_buffer + command_length) - start_offset));
 
     LOG_DEBUG(Debug_GDBStub, "gdb: addr: {:016X} len: {:016X}", addr, len);
@@ -906,7 +909,9 @@ static void ReadMemory() {
         SendReply("E01");
     }
 
-    if (addr < Memory::PROCESS_IMAGE_VADDR || addr >= Memory::MAP_REGION_VADDR_END) {
+    const auto& vm_manager = Core::CurrentProcess()->VMManager();
+    if (addr < vm_manager.GetCodeRegionBaseAddress() ||
+        addr >= vm_manager.GetMapRegionEndAddress()) {
         return SendReply("E00");
     }
 
@@ -995,7 +1000,7 @@ static bool CommitBreakpoint(BreakpointType type, VAddr addr, u64 len) {
     breakpoint.addr = addr;
     breakpoint.len = len;
     Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());
-    static constexpr std::array<u8, 4> btrap{{0xd4, 0x20, 0x7d, 0x0}};
+    static constexpr std::array<u8, 4> btrap{{0x00, 0x7d, 0x20, 0xd4}};
     Memory::WriteBlock(addr, btrap.data(), btrap.size());
     Core::System::GetInstance().InvalidateCpuInstructionCaches();
     p.insert({addr, breakpoint});
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index 545cd884a..419f45896 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -12,7 +12,7 @@
 namespace IPC {
 
 /// Size of the command buffer area, in 32-bit words.
-constexpr size_t COMMAND_BUFFER_LENGTH = 0x100 / sizeof(u32);
+constexpr std::size_t COMMAND_BUFFER_LENGTH = 0x100 / sizeof(u32);
 
 // These errors are commonly returned by invalid IPC translations, so alias them here for
 // convenience.
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 0f3ffdb60..a4bfe2eb0 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -152,8 +152,8 @@ public:
     }
 
     void ValidateHeader() {
-        const size_t num_domain_objects = context->NumDomainObjects();
-        const size_t num_move_objects = context->NumMoveObjects();
+        const std::size_t num_domain_objects = context->NumDomainObjects();
+        const std::size_t num_move_objects = context->NumMoveObjects();
         ASSERT_MSG(!num_domain_objects || !num_move_objects,
                    "cannot move normal handles and domain objects");
         ASSERT_MSG((index - datapayload_index) == normal_params_size,
@@ -290,13 +290,6 @@ public:
         Skip(CommandIdSize, false);
     }
 
-    ResponseBuilder MakeBuilder(u32 normal_params_size, u32 num_handles_to_copy,
-                                u32 num_handles_to_move,
-                                ResponseBuilder::Flags flags = ResponseBuilder::Flags::None) const {
-        return ResponseBuilder{*context, normal_params_size, num_handles_to_copy,
-                               num_handles_to_move, flags};
-    }
-
     template <typename T>
     T Pop();
 
@@ -329,10 +322,10 @@ public:
     T PopRaw();
 
     template <typename T>
-    Kernel::SharedPtr<T> GetMoveObject(size_t index);
+    Kernel::SharedPtr<T> GetMoveObject(std::size_t index);
 
     template <typename T>
-    Kernel::SharedPtr<T> GetCopyObject(size_t index);
+    Kernel::SharedPtr<T> GetCopyObject(std::size_t index);
 
     template <class T>
     std::shared_ptr<T> PopIpcInterface() {
@@ -406,12 +399,12 @@ void RequestParser::Pop(First& first_value, Other&... other_values) {
 }
 
 template <typename T>
-Kernel::SharedPtr<T> RequestParser::GetMoveObject(size_t index) {
+Kernel::SharedPtr<T> RequestParser::GetMoveObject(std::size_t index) {
     return context->GetMoveObject<T>(index);
 }
 
 template <typename T>
-Kernel::SharedPtr<T> RequestParser::GetCopyObject(size_t index) {
+Kernel::SharedPtr<T> RequestParser::GetCopyObject(std::size_t index) {
     return context->GetCopyObject<T>(index);
 }
 
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 6657accd5..93577591f 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -35,16 +35,17 @@ static ResultCode WaitForAddress(VAddr address, s64 timeout) {
 
 // Gets the threads waiting on an address.
 static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
-    const auto RetrieveWaitingThreads =
-        [](size_t core_index, std::vector<SharedPtr<Thread>>& waiting_threads, VAddr arb_addr) {
-            const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
-            auto& thread_list = scheduler->GetThreadList();
-
-            for (auto& thread : thread_list) {
-                if (thread->arb_wait_address == arb_addr)
-                    waiting_threads.push_back(thread);
-            }
-        };
+    const auto RetrieveWaitingThreads = [](std::size_t core_index,
+                                           std::vector<SharedPtr<Thread>>& waiting_threads,
+                                           VAddr arb_addr) {
+        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+        auto& thread_list = scheduler->GetThreadList();
+
+        for (auto& thread : thread_list) {
+            if (thread->arb_wait_address == arb_addr)
+                waiting_threads.push_back(thread);
+        }
+    };
 
     // Retrieve all threads that are waiting for this address.
     std::vector<SharedPtr<Thread>> threads;
@@ -66,12 +67,12 @@ static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address)
 static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
     // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
     // them all.
-    size_t last = waiting_threads.size();
+    std::size_t last = waiting_threads.size();
     if (num_to_wake > 0)
         last = num_to_wake;
 
     // Signal the waiting threads.
-    for (size_t i = 0; i < last; i++) {
+    for (std::size_t i = 0; i < last; i++) {
         ASSERT(waiting_threads[i]->status == ThreadStatus::WaitArb);
         waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
         waiting_threads[i]->arb_wait_address = 0;
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index ad39c8271..e5fa67ae8 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -17,6 +17,7 @@ enum {
 
     // Confirmed Switch OS error codes
     MaxConnectionsReached = 7,
+    InvalidSize = 101,
     InvalidAddress = 102,
     HandleTableFull = 105,
     InvalidMemoryState = 106,
@@ -29,6 +30,8 @@ enum {
     SynchronizationCanceled = 118,
     TooLarge = 119,
     InvalidEnumValue = 120,
+    NoSuchEntry = 121,
+    AlreadyRegistered = 122,
     InvalidState = 125,
     ResourceLimitExceeded = 132,
 };
@@ -55,6 +58,8 @@ constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS(ErrorModule::Kernel,
                                                     ErrCodes::InvalidMemoryPermissions);
 constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle);
 constexpr ResultCode ERR_INVALID_PROCESSOR_ID(ErrorModule::Kernel, ErrCodes::InvalidProcessorId);
+constexpr ResultCode ERR_INVALID_SIZE(ErrorModule::Kernel, ErrCodes::InvalidSize);
+constexpr ResultCode ERR_ALREADY_REGISTERED(ErrorModule::Kernel, ErrCodes::AlreadyRegistered);
 constexpr ResultCode ERR_INVALID_STATE(ErrorModule::Kernel, ErrCodes::InvalidState);
 constexpr ResultCode ERR_INVALID_THREAD_PRIORITY(ErrorModule::Kernel,
                                                  ErrCodes::InvalidThreadPriority);
@@ -63,7 +68,7 @@ constexpr ResultCode ERR_INVALID_OBJECT_ADDR(-1);
 constexpr ResultCode ERR_NOT_AUTHORIZED(-1);
 /// Alternate code returned instead of ERR_INVALID_HANDLE in some code paths.
 constexpr ResultCode ERR_INVALID_HANDLE_OS(-1);
-constexpr ResultCode ERR_NOT_FOUND(-1);
+constexpr ResultCode ERR_NOT_FOUND(ErrorModule::Kernel, ErrCodes::NoSuchEntry);
 constexpr ResultCode RESULT_TIMEOUT(ErrorModule::Kernel, ErrCodes::Timeout);
 /// Returned when Accept() is called on a port with no sessions to be accepted.
 constexpr ResultCode ERR_NO_PENDING_SESSIONS(-1);
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index 3a079b9a9..5ee5c05e3 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -65,7 +65,7 @@ ResultCode HandleTable::Close(Handle handle) {
 }
 
 bool HandleTable::IsValid(Handle handle) const {
-    size_t slot = GetSlot(handle);
+    std::size_t slot = GetSlot(handle);
     u16 generation = GetGeneration(handle);
 
     return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h
index cac928adb..9e2f33e8a 100644
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -93,7 +93,7 @@ private:
      * This is the maximum limit of handles allowed per process in CTR-OS. It can be further
      * reduced by ExHeader values, but this is not emulated here.
      */
-    static const size_t MAX_COUNT = 4096;
+    static const std::size_t MAX_COUNT = 4096;
 
     static u16 GetSlot(Handle handle) {
         return handle >> 15;
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 7264be906..72fb9d250 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -42,9 +42,9 @@ SharedPtr<Event> HLERequestContext::SleepClientThread(SharedPtr<Thread> thread,
                                                       Kernel::SharedPtr<Kernel::Event> event) {
 
     // Put the client thread to sleep until the wait event is signaled or the timeout expires.
-    thread->wakeup_callback =
-        [context = *this, callback](ThreadWakeupReason reason, SharedPtr<Thread> thread,
-                                    SharedPtr<WaitObject> object, size_t index) mutable -> bool {
+    thread->wakeup_callback = [context = *this, callback](
+                                  ThreadWakeupReason reason, SharedPtr<Thread> thread,
+                                  SharedPtr<WaitObject> object, std::size_t index) mutable -> bool {
         ASSERT(thread->status == ThreadStatus::WaitHLEEvent);
         callback(thread, context, reason);
         context.WriteToOutgoingCommandBuffer(*thread);
@@ -199,8 +199,8 @@ ResultCode HLERequestContext::PopulateFromIncomingCommandBuffer(u32_le* src_cmdb
     }
 
     // The data_size already includes the payload header, the padding and the domain header.
-    size_t size = data_payload_offset + command_header->data_size -
-                  sizeof(IPC::DataPayloadHeader) / sizeof(u32) - 4;
+    std::size_t size = data_payload_offset + command_header->data_size -
+                       sizeof(IPC::DataPayloadHeader) / sizeof(u32) - 4;
     if (domain_message_header)
         size -= sizeof(IPC::DomainMessageHeader) / sizeof(u32);
     std::copy_n(src_cmdbuf, size, cmd_buf.begin());
@@ -217,8 +217,8 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(const Thread& thread)
     ParseCommandBuffer(cmd_buf.data(), false);
 
     // The data_size already includes the payload header, the padding and the domain header.
-    size_t size = data_payload_offset + command_header->data_size -
-                  sizeof(IPC::DataPayloadHeader) / sizeof(u32) - 4;
+    std::size_t size = data_payload_offset + command_header->data_size -
+                       sizeof(IPC::DataPayloadHeader) / sizeof(u32) - 4;
     if (domain_message_header)
         size -= sizeof(IPC::DomainMessageHeader) / sizeof(u32);
 
@@ -229,7 +229,7 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(const Thread& thread)
                    "Handle descriptor bit set but no handles to translate");
         // We write the translated handles at a specific offset in the command buffer, this space
         // was already reserved when writing the header.
-        size_t current_offset =
+        std::size_t current_offset =
             (sizeof(IPC::CommandHeader) + sizeof(IPC::HandleDescriptorHeader)) / sizeof(u32);
         ASSERT_MSG(!handle_descriptor_header->send_current_pid, "Sending PID is not implemented");
 
@@ -258,7 +258,7 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(const Thread& thread)
         ASSERT(domain_message_header->num_objects == domain_objects.size());
         // Write the domain objects to the command buffer, these go after the raw untranslated data.
         // TODO(Subv): This completely ignores C buffers.
-        size_t domain_offset = size - domain_message_header->num_objects;
+        std::size_t domain_offset = size - domain_message_header->num_objects;
         auto& request_handlers = server_session->domain_request_handlers;
 
         for (auto& object : domain_objects) {
@@ -291,14 +291,15 @@ std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
     return buffer;
 }
 
-size_t HLERequestContext::WriteBuffer(const void* buffer, size_t size, int buffer_index) const {
+std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
+                                           int buffer_index) const {
     if (size == 0) {
         LOG_WARNING(Core, "skip empty buffer write");
         return 0;
     }
 
     const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
-    const size_t buffer_size{GetWriteBufferSize(buffer_index)};
+    const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
     if (size > buffer_size) {
         LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
                      buffer_size);
@@ -314,13 +315,13 @@ size_t HLERequestContext::WriteBuffer(const void* buffer, size_t size, int buffe
     return size;
 }
 
-size_t HLERequestContext::GetReadBufferSize(int buffer_index) const {
+std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const {
     const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()};
     return is_buffer_a ? BufferDescriptorA()[buffer_index].Size()
                        : BufferDescriptorX()[buffer_index].Size();
 }
 
-size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const {
+std::size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const {
     const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
     return is_buffer_b ? BufferDescriptorB()[buffer_index].Size()
                        : BufferDescriptorC()[buffer_index].Size();
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index f0d07f1b6..894479ee0 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -170,7 +170,7 @@ public:
     std::vector<u8> ReadBuffer(int buffer_index = 0) const;
 
     /// Helper function to write a buffer using the appropriate buffer descriptor
-    size_t WriteBuffer(const void* buffer, size_t size, int buffer_index = 0) const;
+    std::size_t WriteBuffer(const void* buffer, std::size_t size, int buffer_index = 0) const;
 
     /* Helper function to write a buffer using the appropriate buffer descriptor
      *
@@ -182,7 +182,7 @@ public:
      */
     template <typename ContiguousContainer,
               typename = std::enable_if_t<!std::is_pointer_v<ContiguousContainer>>>
-    size_t WriteBuffer(const ContiguousContainer& container, int buffer_index = 0) const {
+    std::size_t WriteBuffer(const ContiguousContainer& container, int buffer_index = 0) const {
         using ContiguousType = typename ContiguousContainer::value_type;
 
         static_assert(std::is_trivially_copyable_v<ContiguousType>,
@@ -193,19 +193,19 @@ public:
     }
 
     /// Helper function to get the size of the input buffer
-    size_t GetReadBufferSize(int buffer_index = 0) const;
+    std::size_t GetReadBufferSize(int buffer_index = 0) const;
 
     /// Helper function to get the size of the output buffer
-    size_t GetWriteBufferSize(int buffer_index = 0) const;
+    std::size_t GetWriteBufferSize(int buffer_index = 0) const;
 
     template <typename T>
-    SharedPtr<T> GetCopyObject(size_t index) {
+    SharedPtr<T> GetCopyObject(std::size_t index) {
         ASSERT(index < copy_objects.size());
         return DynamicObjectCast<T>(copy_objects[index]);
     }
 
     template <typename T>
-    SharedPtr<T> GetMoveObject(size_t index) {
+    SharedPtr<T> GetMoveObject(std::size_t index) {
         ASSERT(index < move_objects.size());
         return DynamicObjectCast<T>(move_objects[index]);
     }
@@ -223,7 +223,7 @@ public:
     }
 
     template <typename T>
-    std::shared_ptr<T> GetDomainRequestHandler(size_t index) const {
+    std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
         return std::static_pointer_cast<T>(domain_request_handlers[index]);
     }
 
@@ -240,15 +240,15 @@ public:
         domain_objects.clear();
     }
 
-    size_t NumMoveObjects() const {
+    std::size_t NumMoveObjects() const {
         return move_objects.size();
     }
 
-    size_t NumCopyObjects() const {
+    std::size_t NumCopyObjects() const {
         return copy_objects.size();
     }
 
-    size_t NumDomainObjects() const {
+    std::size_t NumDomainObjects() const {
         return domain_objects.size();
     }
 
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 36bf0b677..81675eac5 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -16,6 +16,7 @@
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/result.h"
+#include "core/memory.h"
 
 namespace Kernel {
 
@@ -62,7 +63,7 @@ ResultCode Mutex::TryAcquire(HandleTable& handle_table, VAddr address, Handle ho
                              Handle requesting_thread_handle) {
     // The mutex address must be 4-byte aligned
     if ((address % sizeof(u32)) != 0) {
-        return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidAddress);
+        return ERR_INVALID_ADDRESS;
     }
 
     SharedPtr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
@@ -100,7 +101,7 @@ ResultCode Mutex::TryAcquire(HandleTable& handle_table, VAddr address, Handle ho
 ResultCode Mutex::Release(VAddr address) {
     // The mutex address must be 4-byte aligned
     if ((address % sizeof(u32)) != 0) {
-        return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidAddress);
+        return ERR_INVALID_ADDRESS;
     }
 
     auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address);
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h
index b054cbf7d..9eb72315c 100644
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -6,7 +6,6 @@
 
 #include <atomic>
 #include <string>
-#include <utility>
 
 #include <boost/smart_ptr/intrusive_ptr.hpp>
 
@@ -97,7 +96,7 @@ using SharedPtr = boost::intrusive_ptr<T>;
 template <typename T>
 inline SharedPtr<T> DynamicObjectCast(SharedPtr<Object> object) {
     if (object != nullptr && object->GetHandleType() == T::HANDLE_TYPE) {
-        return boost::static_pointer_cast<T>(std::move(object));
+        return boost::static_pointer_cast<T>(object);
     }
     return nullptr;
 }
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index b025e323f..dc9fc8470 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -7,10 +7,13 @@
 #include "common/assert.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
+#include "core/core.h"
+#include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
@@ -32,16 +35,24 @@ SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) {
     process->name = std::move(name);
     process->flags.raw = 0;
     process->flags.memory_region.Assign(MemoryRegion::APPLICATION);
+    process->resource_limit = kernel.ResourceLimitForCategory(ResourceLimitCategory::APPLICATION);
     process->status = ProcessStatus::Created;
     process->program_id = 0;
     process->process_id = kernel.CreateNewProcessID();
+    process->svc_access_mask.set();
 
     kernel.AppendNewProcess(process);
     return process;
 }
 
-void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
-    for (size_t i = 0; i < len; ++i) {
+void Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
+    program_id = metadata.GetTitleID();
+    is_64bit_process = metadata.Is64BitProgram();
+    vm_manager.Reset(metadata.GetAddressSpaceType());
+}
+
+void Process::ParseKernelCaps(const u32* kernel_caps, std::size_t len) {
+    for (std::size_t i = 0; i < len; ++i) {
         u32 descriptor = kernel_caps[i];
         u32 type = descriptor >> 20;
 
@@ -117,7 +128,7 @@ void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
     // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
     // of the user address space.
     vm_manager
-        .MapMemoryBlock(Memory::STACK_AREA_VADDR_END - stack_size,
+        .MapMemoryBlock(vm_manager.GetTLSIORegionEndAddress() - stack_size,
                         std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size,
                         MemoryState::Mapped)
         .Unwrap();
@@ -125,7 +136,92 @@ void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
     vm_manager.LogLayout();
     status = ProcessStatus::Running;
 
-    Kernel::SetupMainThread(kernel, entry_point, main_thread_priority, this);
+    Kernel::SetupMainThread(kernel, entry_point, main_thread_priority, *this);
+}
+
+void Process::PrepareForTermination() {
+    status = ProcessStatus::Exited;
+
+    const auto stop_threads = [this](const std::vector<SharedPtr<Thread>>& thread_list) {
+        for (auto& thread : thread_list) {
+            if (thread->owner_process != this)
+                continue;
+
+            if (thread == GetCurrentThread())
+                continue;
+
+            // TODO(Subv): When are the other running/ready threads terminated?
+            ASSERT_MSG(thread->status == ThreadStatus::WaitSynchAny ||
+                           thread->status == ThreadStatus::WaitSynchAll,
+                       "Exiting processes with non-waiting threads is currently unimplemented");
+
+            thread->Stop();
+        }
+    };
+
+    auto& system = Core::System::GetInstance();
+    stop_threads(system.Scheduler(0)->GetThreadList());
+    stop_threads(system.Scheduler(1)->GetThreadList());
+    stop_threads(system.Scheduler(2)->GetThreadList());
+    stop_threads(system.Scheduler(3)->GetThreadList());
+}
+
+/**
+ * Finds a free location for the TLS section of a thread.
+ * @param tls_slots The TLS page array of the thread's owner process.
+ * Returns a tuple of (page, slot, alloc_needed) where:
+ * page: The index of the first allocated TLS page that has free slots.
+ * slot: The index of the first free slot in the indicated page.
+ * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full).
+ */
+static std::tuple<std::size_t, std::size_t, bool> FindFreeThreadLocalSlot(
+    const std::vector<std::bitset<8>>& tls_slots) {
+    // Iterate over all the allocated pages, and try to find one where not all slots are used.
+    for (std::size_t page = 0; page < tls_slots.size(); ++page) {
+        const auto& page_tls_slots = tls_slots[page];
+        if (!page_tls_slots.all()) {
+            // We found a page with at least one free slot, find which slot it is
+            for (std::size_t slot = 0; slot < page_tls_slots.size(); ++slot) {
+                if (!page_tls_slots.test(slot)) {
+                    return std::make_tuple(page, slot, false);
+                }
+            }
+        }
+    }
+
+    return std::make_tuple(0, 0, true);
+}
+
+VAddr Process::MarkNextAvailableTLSSlotAsUsed(Thread& thread) {
+    auto [available_page, available_slot, needs_allocation] = FindFreeThreadLocalSlot(tls_slots);
+    const VAddr tls_begin = vm_manager.GetTLSIORegionBaseAddress();
+
+    if (needs_allocation) {
+        tls_slots.emplace_back(0); // The page is completely available at the start
+        available_page = tls_slots.size() - 1;
+        available_slot = 0; // Use the first slot in the new page
+
+        // Allocate some memory from the end of the linear heap for this region.
+        auto& tls_memory = thread.GetTLSMemory();
+        tls_memory->insert(tls_memory->end(), Memory::PAGE_SIZE, 0);
+
+        vm_manager.RefreshMemoryBlockMappings(tls_memory.get());
+
+        vm_manager.MapMemoryBlock(tls_begin + available_page * Memory::PAGE_SIZE, tls_memory, 0,
+                                  Memory::PAGE_SIZE, MemoryState::ThreadLocal);
+    }
+
+    tls_slots[available_page].set(available_slot);
+
+    return tls_begin + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE;
+}
+
+void Process::FreeTLSSlot(VAddr tls_address) {
+    const VAddr tls_base = tls_address - vm_manager.GetTLSIORegionBaseAddress();
+    const VAddr tls_page = tls_base / Memory::PAGE_SIZE;
+    const VAddr tls_slot = (tls_base % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
+
+    tls_slots[tls_page].reset(tls_slot);
 }
 
 void Process::LoadModule(SharedPtr<CodeSet> module_, VAddr base_addr) {
@@ -145,8 +241,8 @@ void Process::LoadModule(SharedPtr<CodeSet> module_, VAddr base_addr) {
 }
 
 ResultVal<VAddr> Process::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
-    if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END ||
-        target + size < target) {
+    if (target < vm_manager.GetHeapRegionBaseAddress() ||
+        target + size > vm_manager.GetHeapRegionEndAddress() || target + size < target) {
         return ERR_INVALID_ADDRESS;
     }
 
@@ -181,8 +277,8 @@ ResultVal<VAddr> Process::HeapAllocate(VAddr target, u64 size, VMAPermission per
 }
 
 ResultCode Process::HeapFree(VAddr target, u32 size) {
-    if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END ||
-        target + size < target) {
+    if (target < vm_manager.GetHeapRegionBaseAddress() ||
+        target + size > vm_manager.GetHeapRegionEndAddress() || target + size < target) {
         return ERR_INVALID_ADDRESS;
     }
 
@@ -211,7 +307,7 @@ ResultCode Process::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
                "Shared memory exceeds bounds of mapped block");
 
     const std::shared_ptr<std::vector<u8>>& backing_block = vma->second.backing_block;
-    size_t backing_block_offset = vma->second.offset + vma_offset;
+    std::size_t backing_block_offset = vma->second.offset + vma_offset;
 
     CASCADE_RESULT(auto new_vma,
                    vm_manager.MapMemoryBlock(dst_addr, backing_block, backing_block_offset, size,
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 1587d40c1..590e0c73d 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -17,6 +17,10 @@
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/vm_manager.h"
 
+namespace FileSys {
+class ProgramMetadata;
+}
+
 namespace Kernel {
 
 class KernelCore;
@@ -59,7 +63,7 @@ class ResourceLimit;
 
 struct CodeSet final : public Object {
     struct Segment {
-        size_t offset = 0;
+        std::size_t offset = 0;
         VAddr addr = 0;
         u32 size = 0;
     };
@@ -131,6 +135,121 @@ public:
         return HANDLE_TYPE;
     }
 
+    /// Gets a reference to the process' memory manager.
+    Kernel::VMManager& VMManager() {
+        return vm_manager;
+    }
+
+    /// Gets a const reference to the process' memory manager.
+    const Kernel::VMManager& VMManager() const {
+        return vm_manager;
+    }
+
+    /// Gets the current status of the process
+    ProcessStatus GetStatus() const {
+        return status;
+    }
+
+    /// Gets the unique ID that identifies this particular process.
+    u32 GetProcessID() const {
+        return process_id;
+    }
+
+    /// Gets the title ID corresponding to this process.
+    u64 GetTitleID() const {
+        return program_id;
+    }
+
+    /// Gets the resource limit descriptor for this process
+    ResourceLimit& GetResourceLimit() {
+        return *resource_limit;
+    }
+
+    /// Gets the resource limit descriptor for this process
+    const ResourceLimit& GetResourceLimit() const {
+        return *resource_limit;
+    }
+
+    /// Gets the default CPU ID for this process
+    u8 GetDefaultProcessorID() const {
+        return ideal_processor;
+    }
+
+    /// Gets the bitmask of allowed CPUs that this process' threads can run on.
+    u32 GetAllowedProcessorMask() const {
+        return allowed_processor_mask;
+    }
+
+    /// Gets the bitmask of allowed thread priorities.
+    u32 GetAllowedThreadPriorityMask() const {
+        return allowed_thread_priority_mask;
+    }
+
+    u32 IsVirtualMemoryEnabled() const {
+        return is_virtual_address_memory_enabled;
+    }
+
+    /// Whether this process is an AArch64 or AArch32 process.
+    bool Is64BitProcess() const {
+        return is_64bit_process;
+    }
+
+    /**
+     * Loads process-specifics configuration info with metadata provided
+     * by an executable.
+     *
+     * @param metadata The provided metadata to load process specific info.
+     */
+    void LoadFromMetadata(const FileSys::ProgramMetadata& metadata);
+
+    /**
+     * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them
+     * to this process.
+     */
+    void ParseKernelCaps(const u32* kernel_caps, std::size_t len);
+
+    /**
+     * Applies address space changes and launches the process main thread.
+     */
+    void Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size);
+
+    /**
+     * Prepares a process for termination by stopping all of its threads
+     * and clearing any other resources.
+     */
+    void PrepareForTermination();
+
+    void LoadModule(SharedPtr<CodeSet> module_, VAddr base_addr);
+
+    ///////////////////////////////////////////////////////////////////////////////////////////////
+    // Memory Management
+
+    // Marks the next available region as used and returns the address of the slot.
+    VAddr MarkNextAvailableTLSSlotAsUsed(Thread& thread);
+
+    // Frees a used TLS slot identified by the given address
+    void FreeTLSSlot(VAddr tls_address);
+
+    ResultVal<VAddr> HeapAllocate(VAddr target, u64 size, VMAPermission perms);
+    ResultCode HeapFree(VAddr target, u32 size);
+
+    ResultCode MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size);
+
+    ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size);
+
+private:
+    explicit Process(KernelCore& kernel);
+    ~Process() override;
+
+    /// Memory manager for this process.
+    Kernel::VMManager vm_manager;
+
+    /// Current status of the process
+    ProcessStatus status;
+
+    /// The ID of this process
+    u32 process_id = 0;
+
     /// Title ID corresponding to the process
     u64 program_id;
 
@@ -140,7 +259,7 @@ public:
     /// The process may only call SVCs which have the corresponding bit set.
     std::bitset<0x80> svc_access_mask;
     /// Maximum size of the handle table for the process.
-    unsigned int handle_table_size = 0x200;
+    u32 handle_table_size = 0x200;
     /// Special memory ranges mapped into this processes address space. This is used to give
     /// processes access to specific I/O regions and device memory.
     boost::container::static_vector<AddressMapping, 8> address_mappings;
@@ -154,29 +273,6 @@ public:
     u32 allowed_processor_mask = THREADPROCESSORID_DEFAULT_MASK;
     u32 allowed_thread_priority_mask = 0xFFFFFFFF;
     u32 is_virtual_address_memory_enabled = 0;
-    /// Current status of the process
-    ProcessStatus status;
-
-    /// The ID of this process
-    u32 process_id = 0;
-
-    /**
-     * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them
-     * to this process.
-     */
-    void ParseKernelCaps(const u32* kernel_caps, size_t len);
-
-    /**
-     * Applies address space changes and launches the process main thread.
-     */
-    void Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size);
-
-    void LoadModule(SharedPtr<CodeSet> module_, VAddr base_addr);
-
-    ///////////////////////////////////////////////////////////////////////////////////////////////
-    // Memory Management
-
-    VMManager vm_manager;
 
     // Memory used to back the allocations in the regular heap. A single vector is used to cover
     // the entire virtual address space extents that bound the allocations, including any holes.
@@ -196,18 +292,12 @@ public:
     /// This vector will grow as more pages are allocated for new threads.
     std::vector<std::bitset<8>> tls_slots;
 
-    std::string name;
-
-    ResultVal<VAddr> HeapAllocate(VAddr target, u64 size, VMAPermission perms);
-    ResultCode HeapFree(VAddr target, u32 size);
-
-    ResultCode MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size);
+    /// Whether or not this process is AArch64, or AArch32.
+    /// By default, we currently assume this is true, unless otherwise
+    /// specified by metadata provided to the process during loading.
+    bool is_64bit_process = true;
 
-    ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size);
-
-private:
-    explicit Process(KernelCore& kernel);
-    ~Process() override;
+    std::string name;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 69c812f16..1e82cfffb 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -17,7 +17,7 @@ namespace Kernel {
 
 std::mutex Scheduler::scheduler_mutex;
 
-Scheduler::Scheduler(Core::ARM_Interface* cpu_core) : cpu_core(cpu_core) {}
+Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {}
 
 Scheduler::~Scheduler() {
     for (auto& thread : thread_list) {
@@ -59,9 +59,9 @@ void Scheduler::SwitchContext(Thread* new_thread) {
     // Save context for previous thread
     if (previous_thread) {
         previous_thread->last_running_ticks = CoreTiming::GetTicks();
-        cpu_core->SaveContext(previous_thread->context);
+        cpu_core.SaveContext(previous_thread->context);
         // Save the TPIDR_EL0 system register in case it was modified.
-        previous_thread->tpidr_el0 = cpu_core->GetTPIDR_EL0();
+        previous_thread->tpidr_el0 = cpu_core.GetTPIDR_EL0();
 
         if (previous_thread->status == ThreadStatus::Running) {
             // This is only the case when a reschedule is triggered without the current thread
@@ -88,13 +88,13 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 
         if (previous_process != current_thread->owner_process) {
             Core::CurrentProcess() = current_thread->owner_process;
-            SetCurrentPageTable(&Core::CurrentProcess()->vm_manager.page_table);
+            SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table);
         }
 
-        cpu_core->LoadContext(new_thread->context);
-        cpu_core->SetTlsAddress(new_thread->GetTLSAddress());
-        cpu_core->SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
-        cpu_core->ClearExclusiveState();
+        cpu_core.LoadContext(new_thread->context);
+        cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
+        cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
+        cpu_core.ClearExclusiveState();
     } else {
         current_thread = nullptr;
         // Note: We do not reset the current process and current page table when idling because
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 744990c9b..2c94641ec 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -19,7 +19,7 @@ namespace Kernel {
 
 class Scheduler final {
 public:
-    explicit Scheduler(Core::ARM_Interface* cpu_core);
+    explicit Scheduler(Core::ARM_Interface& cpu_core);
     ~Scheduler();
 
     /// Returns whether there are any threads that are ready to run.
@@ -72,7 +72,7 @@ private:
 
     SharedPtr<Thread> current_thread = nullptr;
 
-    Core::ARM_Interface* cpu_core;
+    Core::ARM_Interface& cpu_core;
 
     static std::mutex scheduler_mutex;
 };
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index abb1d09cd..d061e6155 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -8,6 +8,7 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/shared_memory.h"
 #include "core/memory.h"
 
@@ -34,11 +35,11 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, SharedPtr<Proce
 
         // Refresh the address mappings for the current process.
         if (Core::CurrentProcess() != nullptr) {
-            Core::CurrentProcess()->vm_manager.RefreshMemoryBlockMappings(
+            Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
                 shared_memory->backing_block.get());
         }
     } else {
-        auto& vm_manager = shared_memory->owner_process->vm_manager;
+        auto& vm_manager = shared_memory->owner_process->VMManager();
 
         // The memory is already available and mapped in the owner process.
         auto vma = vm_manager.FindVMA(address);
@@ -71,7 +72,8 @@ SharedPtr<SharedMemory> SharedMemory::CreateForApplet(
     shared_memory->other_permissions = other_permissions;
     shared_memory->backing_block = std::move(heap_block);
     shared_memory->backing_block_offset = offset;
-    shared_memory->base_address = Memory::HEAP_VADDR + offset;
+    shared_memory->base_address =
+        kernel.CurrentProcess()->VMManager().GetHeapRegionBaseAddress() + offset;
 
     return shared_memory;
 }
@@ -105,7 +107,7 @@ ResultCode SharedMemory::Map(Process* target_process, VAddr address, MemoryPermi
     VAddr target_address = address;
 
     // Map the memory block into the target process
-    auto result = target_process->vm_manager.MapMemoryBlock(
+    auto result = target_process->VMManager().MapMemoryBlock(
         target_address, backing_block, backing_block_offset, size, MemoryState::Shared);
     if (result.Failed()) {
         LOG_ERROR(
@@ -115,14 +117,14 @@ ResultCode SharedMemory::Map(Process* target_process, VAddr address, MemoryPermi
         return result.Code();
     }
 
-    return target_process->vm_manager.ReprotectRange(target_address, size,
-                                                     ConvertPermissions(permissions));
+    return target_process->VMManager().ReprotectRange(target_address, size,
+                                                      ConvertPermissions(permissions));
 }
 
 ResultCode SharedMemory::Unmap(Process* target_process, VAddr address) {
     // TODO(Subv): Verify what happens if the application tries to unmap an address that is not
     // mapped to a SharedMemory.
-    return target_process->vm_manager.UnmapRange(address, size);
+    return target_process->VMManager().UnmapRange(address, size);
 }
 
 VMAPermission SharedMemory::ConvertPermissions(MemoryPermission permission) {
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index 2c729afe3..2c06bb7ce 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -119,7 +119,7 @@ public:
     /// Backing memory for this shared memory block.
     std::shared_ptr<std::vector<u8>> backing_block;
     /// Offset into the backing block for this shared memory.
-    size_t backing_block_offset;
+    std::size_t backing_block_offset;
     /// Size of the memory block. Page-aligned.
     u64 size;
     /// Permission restrictions applied to the process which created the block.
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index f500fd2e7..1cdaa740a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -35,13 +35,25 @@
 #include "core/hle/service/service.h"
 
 namespace Kernel {
+namespace {
+constexpr bool Is4KBAligned(VAddr address) {
+    return (address & 0xFFF) == 0;
+}
+} // Anonymous namespace
 
 /// Set the process heap to a given Size. It can both extend and shrink the heap.
 static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
     LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);
+
+    // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 4GB.
+    if ((heap_size & 0xFFFFFFFE001FFFFF) != 0) {
+        return ERR_INVALID_SIZE;
+    }
+
     auto& process = *Core::CurrentProcess();
+    const VAddr heap_base = process.VMManager().GetHeapRegionBaseAddress();
     CASCADE_RESULT(*heap_addr,
-                   process.HeapAllocate(Memory::HEAP_VADDR, heap_size, VMAPermission::ReadWrite));
+                   process.HeapAllocate(heap_base, heap_size, VMAPermission::ReadWrite));
     return RESULT_SUCCESS;
 }
 
@@ -56,6 +68,15 @@ static ResultCode SetMemoryAttribute(VAddr addr, u64 size, u32 state0, u32 state
 static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
     LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
               src_addr, size);
+
+    if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Is4KBAligned(size)) {
+        return ERR_INVALID_SIZE;
+    }
+
     return Core::CurrentProcess()->MirrorMemory(dst_addr, src_addr, size);
 }
 
@@ -63,6 +84,15 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
 static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
     LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
               src_addr, size);
+
+    if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Is4KBAligned(size)) {
+        return ERR_INVALID_SIZE;
+    }
+
     return Core::CurrentProcess()->UnmapMemory(dst_addr, src_addr, size);
 }
 
@@ -140,13 +170,13 @@ static ResultCode GetProcessId(u32* process_id, Handle process_handle) {
         return ERR_INVALID_HANDLE;
     }
 
-    *process_id = process->process_id;
+    *process_id = process->GetProcessID();
     return RESULT_SUCCESS;
 }
 
 /// Default thread wakeup callback for WaitSynchronization
 static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, SharedPtr<Thread> thread,
-                                        SharedPtr<WaitObject> object, size_t index) {
+                                        SharedPtr<WaitObject> object, std::size_t index) {
     ASSERT(thread->status == ThreadStatus::WaitSynchAny);
 
     if (reason == ThreadWakeupReason::Timeout) {
@@ -251,6 +281,10 @@ static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
               "requesting_current_thread_handle=0x{:08X}",
               holding_thread_handle, mutex_addr, requesting_thread_handle);
 
+    if (Memory::IsKernelVirtualAddress(mutex_addr)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
     auto& handle_table = Core::System::GetInstance().Kernel().HandleTable();
     return Mutex::TryAcquire(handle_table, mutex_addr, holding_thread_handle,
                              requesting_thread_handle);
@@ -260,6 +294,10 @@ static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
 static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
     LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr);
 
+    if (Memory::IsKernelVirtualAddress(mutex_addr)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
     return Mutex::Release(mutex_addr);
 }
 
@@ -288,26 +326,27 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
     LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id,
               info_sub_id, handle);
 
-    const auto& vm_manager = Core::CurrentProcess()->vm_manager;
+    const auto& current_process = Core::CurrentProcess();
+    const auto& vm_manager = current_process->VMManager();
 
     switch (static_cast<GetInfoType>(info_id)) {
     case GetInfoType::AllowedCpuIdBitmask:
-        *result = Core::CurrentProcess()->allowed_processor_mask;
+        *result = current_process->GetAllowedProcessorMask();
         break;
     case GetInfoType::AllowedThreadPrioBitmask:
-        *result = Core::CurrentProcess()->allowed_thread_priority_mask;
+        *result = current_process->GetAllowedThreadPriorityMask();
         break;
     case GetInfoType::MapRegionBaseAddr:
-        *result = Memory::MAP_REGION_VADDR;
+        *result = vm_manager.GetMapRegionBaseAddress();
         break;
     case GetInfoType::MapRegionSize:
-        *result = Memory::MAP_REGION_SIZE;
+        *result = vm_manager.GetMapRegionSize();
         break;
     case GetInfoType::HeapRegionBaseAddr:
-        *result = Memory::HEAP_VADDR;
+        *result = vm_manager.GetHeapRegionBaseAddress();
         break;
     case GetInfoType::HeapRegionSize:
-        *result = Memory::HEAP_SIZE;
+        *result = vm_manager.GetHeapRegionSize();
         break;
     case GetInfoType::TotalMemoryUsage:
         *result = vm_manager.GetTotalMemoryUsage();
@@ -322,22 +361,35 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
         *result = 0;
         break;
     case GetInfoType::AddressSpaceBaseAddr:
-        *result = vm_manager.GetAddressSpaceBaseAddr();
+        *result = vm_manager.GetCodeRegionBaseAddress();
         break;
-    case GetInfoType::AddressSpaceSize:
-        *result = vm_manager.GetAddressSpaceSize();
+    case GetInfoType::AddressSpaceSize: {
+        const u64 width = vm_manager.GetAddressSpaceWidth();
+
+        switch (width) {
+        case 32:
+            *result = 0xFFE00000;
+            break;
+        case 36:
+            *result = 0xFF8000000;
+            break;
+        case 39:
+            *result = 0x7FF8000000;
+            break;
+        }
         break;
+    }
     case GetInfoType::NewMapRegionBaseAddr:
-        *result = Memory::NEW_MAP_REGION_VADDR;
+        *result = vm_manager.GetNewMapRegionBaseAddress();
         break;
     case GetInfoType::NewMapRegionSize:
-        *result = Memory::NEW_MAP_REGION_SIZE;
+        *result = vm_manager.GetNewMapRegionSize();
         break;
     case GetInfoType::IsVirtualAddressMemoryEnabled:
-        *result = Core::CurrentProcess()->is_virtual_address_memory_enabled;
+        *result = current_process->IsVirtualMemoryEnabled();
         break;
     case GetInfoType::TitleId:
-        *result = Core::CurrentProcess()->program_id;
+        *result = current_process->GetTitleID();
         break;
     case GetInfoType::PrivilegedProcessId:
         LOG_WARNING(Kernel_SVC,
@@ -363,8 +415,36 @@ static ResultCode SetThreadActivity(Handle handle, u32 unknown) {
 }
 
 /// Gets the thread context
-static ResultCode GetThreadContext(Handle handle, VAddr addr) {
-    LOG_WARNING(Kernel_SVC, "(STUBBED) called, handle=0x{:08X}, addr=0x{:X}", handle, addr);
+static ResultCode GetThreadContext(VAddr thread_context, Handle handle) {
+    LOG_DEBUG(Kernel_SVC, "called, context=0x{:08X}, thread=0x{:X}", thread_context, handle);
+
+    auto& kernel = Core::System::GetInstance().Kernel();
+    const SharedPtr<Thread> thread = kernel.HandleTable().Get<Thread>(handle);
+    if (!thread) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    const auto current_process = Core::CurrentProcess();
+    if (thread->owner_process != current_process) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    if (thread == GetCurrentThread()) {
+        return ERR_ALREADY_REGISTERED;
+    }
+
+    Core::ARM_Interface::ThreadContext ctx = thread->context;
+    // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
+    ctx.pstate &= 0xFF0FFE20;
+
+    // If 64-bit, we can just write the context registers directly and we're good.
+    // However, if 32-bit, we have to ensure some registers are zeroed out.
+    if (!current_process->Is64BitProcess()) {
+        std::fill(ctx.cpu_registers.begin() + 15, ctx.cpu_registers.end(), 0);
+        std::fill(ctx.vector_registers.begin() + 16, ctx.vector_registers.end(), u128{});
+    }
+
+    Memory::WriteBlock(thread_context, &ctx, sizeof(ctx));
     return RESULT_SUCCESS;
 }
 
@@ -392,8 +472,8 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
 
     // Note: The kernel uses the current process's resource limit instead of
     // the one from the thread owner's resource limit.
-    SharedPtr<ResourceLimit>& resource_limit = Core::CurrentProcess()->resource_limit;
-    if (resource_limit->GetMaxResourceValue(ResourceType::Priority) > priority) {
+    const ResourceLimit& resource_limit = Core::CurrentProcess()->GetResourceLimit();
+    if (resource_limit.GetMaxResourceValue(ResourceType::Priority) > priority) {
         return ERR_NOT_AUTHORIZED;
     }
 
@@ -415,35 +495,43 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
               "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}",
               shared_memory_handle, addr, size, permissions);
 
+    if (!Is4KBAligned(addr)) {
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Is4KBAligned(size)) {
+        return ERR_INVALID_SIZE;
+    }
+
+    const auto permissions_type = static_cast<MemoryPermission>(permissions);
+    if (permissions_type != MemoryPermission::Read &&
+        permissions_type != MemoryPermission::ReadWrite) {
+        LOG_ERROR(Kernel_SVC, "Invalid permissions=0x{:08X}", permissions);
+        return ERR_INVALID_MEMORY_PERMISSIONS;
+    }
+
     auto& kernel = Core::System::GetInstance().Kernel();
     auto shared_memory = kernel.HandleTable().Get<SharedMemory>(shared_memory_handle);
     if (!shared_memory) {
         return ERR_INVALID_HANDLE;
     }
 
-    MemoryPermission permissions_type = static_cast<MemoryPermission>(permissions);
-    switch (permissions_type) {
-    case MemoryPermission::Read:
-    case MemoryPermission::Write:
-    case MemoryPermission::ReadWrite:
-    case MemoryPermission::Execute:
-    case MemoryPermission::ReadExecute:
-    case MemoryPermission::WriteExecute:
-    case MemoryPermission::ReadWriteExecute:
-    case MemoryPermission::DontCare:
-        return shared_memory->Map(Core::CurrentProcess().get(), addr, permissions_type,
-                                  MemoryPermission::DontCare);
-    default:
-        LOG_ERROR(Kernel_SVC, "unknown permissions=0x{:08X}", permissions);
-    }
-
-    return RESULT_SUCCESS;
+    return shared_memory->Map(Core::CurrentProcess().get(), addr, permissions_type,
+                              MemoryPermission::DontCare);
 }
 
 static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size) {
     LOG_WARNING(Kernel_SVC, "called, shared_memory_handle=0x{:08X}, addr=0x{:X}, size=0x{:X}",
                 shared_memory_handle, addr, size);
 
+    if (!Is4KBAligned(addr)) {
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Is4KBAligned(size)) {
+        return ERR_INVALID_SIZE;
+    }
+
     auto& kernel = Core::System::GetInstance().Kernel();
     auto shared_memory = kernel.HandleTable().Get<SharedMemory>(shared_memory_handle);
 
@@ -459,9 +547,9 @@ static ResultCode QueryProcessMemory(MemoryInfo* memory_info, PageInfo* /*page_i
     if (!process) {
         return ERR_INVALID_HANDLE;
     }
-    auto vma = process->vm_manager.FindVMA(addr);
+    auto vma = process->VMManager().FindVMA(addr);
     memory_info->attributes = 0;
-    if (vma == Core::CurrentProcess()->vm_manager.vma_map.end()) {
+    if (vma == Core::CurrentProcess()->VMManager().vma_map.end()) {
         memory_info->base_address = 0;
         memory_info->permission = static_cast<u32>(VMAPermission::None);
         memory_info->size = 0;
@@ -485,35 +573,13 @@ static ResultCode QueryMemory(MemoryInfo* memory_info, PageInfo* page_info, VAdd
 
 /// Exits the current process
 static void ExitProcess() {
-    LOG_INFO(Kernel_SVC, "Process {} exiting", Core::CurrentProcess()->process_id);
+    auto& current_process = Core::CurrentProcess();
 
-    ASSERT_MSG(Core::CurrentProcess()->status == ProcessStatus::Running,
+    LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID());
+    ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running,
                "Process has already exited");
 
-    Core::CurrentProcess()->status = ProcessStatus::Exited;
-
-    auto stop_threads = [](const std::vector<SharedPtr<Thread>>& thread_list) {
-        for (auto& thread : thread_list) {
-            if (thread->owner_process != Core::CurrentProcess())
-                continue;
-
-            if (thread == GetCurrentThread())
-                continue;
-
-            // TODO(Subv): When are the other running/ready threads terminated?
-            ASSERT_MSG(thread->status == ThreadStatus::WaitSynchAny ||
-                           thread->status == ThreadStatus::WaitSynchAll,
-                       "Exiting processes with non-waiting threads is currently unimplemented");
-
-            thread->Stop();
-        }
-    };
-
-    auto& system = Core::System::GetInstance();
-    stop_threads(system.Scheduler(0)->GetThreadList());
-    stop_threads(system.Scheduler(1)->GetThreadList());
-    stop_threads(system.Scheduler(2)->GetThreadList());
-    stop_threads(system.Scheduler(3)->GetThreadList());
+    current_process->PrepareForTermination();
 
     // Kill the current thread
     GetCurrentThread()->Stop();
@@ -524,20 +590,20 @@ static void ExitProcess() {
 /// Creates a new thread
 static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, VAddr stack_top,
                                u32 priority, s32 processor_id) {
-    std::string name = fmt::format("unknown-{:X}", entry_point);
+    std::string name = fmt::format("thread-{:X}", entry_point);
 
     if (priority > THREADPRIO_LOWEST) {
         return ERR_INVALID_THREAD_PRIORITY;
     }
 
-    SharedPtr<ResourceLimit>& resource_limit = Core::CurrentProcess()->resource_limit;
-    if (resource_limit->GetMaxResourceValue(ResourceType::Priority) > priority) {
+    const ResourceLimit& resource_limit = Core::CurrentProcess()->GetResourceLimit();
+    if (resource_limit.GetMaxResourceValue(ResourceType::Priority) > priority) {
         return ERR_NOT_AUTHORIZED;
     }
 
     if (processor_id == THREADPROCESSORID_DEFAULT) {
         // Set the target CPU to the one specified in the process' exheader.
-        processor_id = Core::CurrentProcess()->ideal_processor;
+        processor_id = Core::CurrentProcess()->GetDefaultProcessorID();
         ASSERT(processor_id != THREADPROCESSORID_DEFAULT);
     }
 
@@ -647,16 +713,17 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
     LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
               condition_variable_addr, target);
 
-    auto RetrieveWaitingThreads =
-        [](size_t core_index, std::vector<SharedPtr<Thread>>& waiting_threads, VAddr condvar_addr) {
-            const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
-            auto& thread_list = scheduler->GetThreadList();
+    auto RetrieveWaitingThreads = [](std::size_t core_index,
+                                     std::vector<SharedPtr<Thread>>& waiting_threads,
+                                     VAddr condvar_addr) {
+        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+        auto& thread_list = scheduler->GetThreadList();
 
-            for (auto& thread : thread_list) {
-                if (thread->condvar_wait_address == condvar_addr)
-                    waiting_threads.push_back(thread);
-            }
-        };
+        for (auto& thread : thread_list) {
+            if (thread->condvar_wait_address == condvar_addr)
+                waiting_threads.push_back(thread);
+        }
+    };
 
     // Retrieve a list of all threads that are waiting for this condition variable.
     std::vector<SharedPtr<Thread>> waiting_threads;
@@ -672,7 +739,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
     // Only process up to 'target' threads, unless 'target' is -1, in which case process
     // them all.
-    size_t last = waiting_threads.size();
+    std::size_t last = waiting_threads.size();
     if (target != -1)
         last = target;
 
@@ -680,12 +747,12 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
     if (last > waiting_threads.size())
         return RESULT_SUCCESS;
 
-    for (size_t index = 0; index < last; ++index) {
+    for (std::size_t index = 0; index < last; ++index) {
         auto& thread = waiting_threads[index];
 
         ASSERT(thread->condvar_wait_address == condition_variable_addr);
 
-        size_t current_core = Core::System::GetInstance().CurrentCoreIndex();
+        std::size_t current_core = Core::System::GetInstance().CurrentCoreIndex();
 
         auto& monitor = Core::System::GetInstance().Monitor();
 
@@ -863,10 +930,10 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
     }
 
     if (core == static_cast<u32>(THREADPROCESSORID_DEFAULT)) {
-        ASSERT(thread->owner_process->ideal_processor !=
+        ASSERT(thread->owner_process->GetDefaultProcessorID() !=
                static_cast<u8>(THREADPROCESSORID_DEFAULT));
         // Set the target CPU to the one specified in the process' exheader.
-        core = thread->owner_process->ideal_processor;
+        core = thread->owner_process->GetDefaultProcessorID();
         mask = 1ull << core;
     }
 
@@ -898,12 +965,28 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss
     LOG_TRACE(Kernel_SVC, "called, size=0x{:X}, localPerms=0x{:08X}, remotePerms=0x{:08X}", size,
               local_permissions, remote_permissions);
 
+    // Size must be a multiple of 4KB and be less than or equal to
+    // approx. 8 GB (actually (1GB - 512B) * 8)
+    if (size == 0 || (size & 0xFFFFFFFE00000FFF) != 0) {
+        return ERR_INVALID_SIZE;
+    }
+
+    const auto local_perms = static_cast<MemoryPermission>(local_permissions);
+    if (local_perms != MemoryPermission::Read && local_perms != MemoryPermission::ReadWrite) {
+        return ERR_INVALID_MEMORY_PERMISSIONS;
+    }
+
+    const auto remote_perms = static_cast<MemoryPermission>(remote_permissions);
+    if (remote_perms != MemoryPermission::Read && remote_perms != MemoryPermission::ReadWrite &&
+        remote_perms != MemoryPermission::DontCare) {
+        return ERR_INVALID_MEMORY_PERMISSIONS;
+    }
+
     auto& kernel = Core::System::GetInstance().Kernel();
     auto& handle_table = kernel.HandleTable();
     auto shared_mem_handle =
         SharedMemory::Create(kernel, handle_table.Get<Process>(KernelHandle::CurrentProcess), size,
-                             static_cast<MemoryPermission>(local_permissions),
-                             static_cast<MemoryPermission>(remote_permissions));
+                             local_perms, remote_perms);
 
     CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle));
     return RESULT_SUCCESS;
@@ -977,7 +1060,7 @@ static const FunctionDef SVC_Table[] = {
     {0x2B, nullptr, "FlushDataCache"},
     {0x2C, nullptr, "MapPhysicalMemory"},
     {0x2D, nullptr, "UnmapPhysicalMemory"},
-    {0x2E, nullptr, "GetNextThreadInfo"},
+    {0x2E, nullptr, "GetFutureThreadInfo"},
     {0x2F, nullptr, "GetLastThreadInfo"},
     {0x30, nullptr, "GetResourceLimitLimitValue"},
     {0x31, nullptr, "GetResourceLimitCurrentValue"},
@@ -1003,11 +1086,11 @@ static const FunctionDef SVC_Table[] = {
     {0x45, nullptr, "CreateEvent"},
     {0x46, nullptr, "Unknown"},
     {0x47, nullptr, "Unknown"},
-    {0x48, nullptr, "AllocateUnsafeMemory"},
-    {0x49, nullptr, "FreeUnsafeMemory"},
-    {0x4A, nullptr, "SetUnsafeAllocationLimit"},
-    {0x4B, nullptr, "CreateJitMemory"},
-    {0x4C, nullptr, "MapJitMemory"},
+    {0x48, nullptr, "MapPhysicalMemoryUnsafe"},
+    {0x49, nullptr, "UnmapPhysicalMemoryUnsafe"},
+    {0x4A, nullptr, "SetUnsafeLimit"},
+    {0x4B, nullptr, "CreateCodeMemory"},
+    {0x4C, nullptr, "ControlCodeMemory"},
     {0x4D, nullptr, "SleepSystem"},
     {0x4E, nullptr, "ReadWriteRegister"},
     {0x4F, nullptr, "SetProcessActivity"},
@@ -1042,7 +1125,7 @@ static const FunctionDef SVC_Table[] = {
     {0x6C, nullptr, "SetHardwareBreakPoint"},
     {0x6D, nullptr, "GetDebugThreadParam"},
     {0x6E, nullptr, "Unknown"},
-    {0x6F, nullptr, "GetMemoryInfo"},
+    {0x6F, nullptr, "GetSystemInfo"},
     {0x70, nullptr, "CreatePort"},
     {0x71, nullptr, "ManageNamedPort"},
     {0x72, nullptr, "ConnectToPort"},
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 1eda5f879..22712e64f 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -13,7 +13,9 @@
 
 namespace Kernel {
 
-#define PARAM(n) Core::CurrentArmInterface().GetReg(n)
+static inline u64 Param(int n) {
+    return Core::CurrentArmInterface().GetReg(n);
+}
 
 /**
  * HLE a function return from the current ARM userland process
@@ -28,23 +30,23 @@ static inline void FuncReturn(u64 res) {
 
 template <ResultCode func(u64)>
 void SvcWrap() {
-    FuncReturn(func(PARAM(0)).raw);
+    FuncReturn(func(Param(0)).raw);
 }
 
 template <ResultCode func(u32)>
 void SvcWrap() {
-    FuncReturn(func((u32)PARAM(0)).raw);
+    FuncReturn(func((u32)Param(0)).raw);
 }
 
 template <ResultCode func(u32, u32)>
 void SvcWrap() {
-    FuncReturn(func((u32)PARAM(0), (u32)PARAM(1)).raw);
+    FuncReturn(func((u32)Param(0), (u32)Param(1)).raw);
 }
 
 template <ResultCode func(u32*, u32)>
 void SvcWrap() {
     u32 param_1 = 0;
-    u32 retval = func(&param_1, (u32)PARAM(1)).raw;
+    u32 retval = func(&param_1, (u32)Param(1)).raw;
     Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
@@ -52,39 +54,44 @@ void SvcWrap() {
 template <ResultCode func(u32*, u64)>
 void SvcWrap() {
     u32 param_1 = 0;
-    u32 retval = func(&param_1, PARAM(1)).raw;
+    u32 retval = func(&param_1, Param(1)).raw;
     Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
 template <ResultCode func(u64, s32)>
 void SvcWrap() {
-    FuncReturn(func(PARAM(0), (s32)PARAM(1)).raw);
+    FuncReturn(func(Param(0), (s32)Param(1)).raw);
+}
+
+template <ResultCode func(u64, u32)>
+void SvcWrap() {
+    FuncReturn(func(Param(0), static_cast<u32>(Param(1))).raw);
 }
 
 template <ResultCode func(u64*, u64)>
 void SvcWrap() {
     u64 param_1 = 0;
-    u32 retval = func(&param_1, PARAM(1)).raw;
+    u32 retval = func(&param_1, Param(1)).raw;
     Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
 template <ResultCode func(u32, u64)>
 void SvcWrap() {
-    FuncReturn(func((u32)(PARAM(0) & 0xFFFFFFFF), PARAM(1)).raw);
+    FuncReturn(func((u32)(Param(0) & 0xFFFFFFFF), Param(1)).raw);
 }
 
 template <ResultCode func(u32, u32, u64)>
 void SvcWrap() {
-    FuncReturn(func((u32)(PARAM(0) & 0xFFFFFFFF), (u32)(PARAM(1) & 0xFFFFFFFF), PARAM(2)).raw);
+    FuncReturn(func((u32)(Param(0) & 0xFFFFFFFF), (u32)(Param(1) & 0xFFFFFFFF), Param(2)).raw);
 }
 
 template <ResultCode func(u32, u32*, u64*)>
 void SvcWrap() {
     u32 param_1 = 0;
     u64 param_2 = 0;
-    ResultCode retval = func((u32)(PARAM(2) & 0xFFFFFFFF), &param_1, &param_2);
+    ResultCode retval = func((u32)(Param(2) & 0xFFFFFFFF), &param_1, &param_2);
     Core::CurrentArmInterface().SetReg(1, param_1);
     Core::CurrentArmInterface().SetReg(2, param_2);
     FuncReturn(retval.raw);
@@ -93,46 +100,46 @@ void SvcWrap() {
 template <ResultCode func(u64, u64, u32, u32)>
 void SvcWrap() {
     FuncReturn(
-        func(PARAM(0), PARAM(1), (u32)(PARAM(3) & 0xFFFFFFFF), (u32)(PARAM(3) & 0xFFFFFFFF)).raw);
+        func(Param(0), Param(1), (u32)(Param(3) & 0xFFFFFFFF), (u32)(Param(3) & 0xFFFFFFFF)).raw);
 }
 
 template <ResultCode func(u32, u64, u32)>
 void SvcWrap() {
-    FuncReturn(func((u32)PARAM(0), PARAM(1), (u32)PARAM(2)).raw);
+    FuncReturn(func((u32)Param(0), Param(1), (u32)Param(2)).raw);
 }
 
 template <ResultCode func(u64, u64, u64)>
 void SvcWrap() {
-    FuncReturn(func(PARAM(0), PARAM(1), PARAM(2)).raw);
+    FuncReturn(func(Param(0), Param(1), Param(2)).raw);
 }
 
 template <ResultCode func(u32, u64, u64, u32)>
 void SvcWrap() {
-    FuncReturn(func((u32)PARAM(0), PARAM(1), PARAM(2), (u32)PARAM(3)).raw);
+    FuncReturn(func((u32)Param(0), Param(1), Param(2), (u32)Param(3)).raw);
 }
 
 template <ResultCode func(u32, u64, u64)>
 void SvcWrap() {
-    FuncReturn(func((u32)PARAM(0), PARAM(1), PARAM(2)).raw);
+    FuncReturn(func((u32)Param(0), Param(1), Param(2)).raw);
 }
 
 template <ResultCode func(u32*, u64, u64, s64)>
 void SvcWrap() {
     u32 param_1 = 0;
-    ResultCode retval = func(&param_1, PARAM(1), (u32)(PARAM(2) & 0xFFFFFFFF), (s64)PARAM(3));
+    ResultCode retval = func(&param_1, Param(1), (u32)(Param(2) & 0xFFFFFFFF), (s64)Param(3));
     Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval.raw);
 }
 
 template <ResultCode func(u64, u64, u32, s64)>
 void SvcWrap() {
-    FuncReturn(func(PARAM(0), PARAM(1), (u32)PARAM(2), (s64)PARAM(3)).raw);
+    FuncReturn(func(Param(0), Param(1), (u32)Param(2), (s64)Param(3)).raw);
 }
 
 template <ResultCode func(u64*, u64, u64, u64)>
 void SvcWrap() {
     u64 param_1 = 0;
-    u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3)).raw;
+    u32 retval = func(&param_1, Param(1), Param(2), Param(3)).raw;
     Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
@@ -141,7 +148,7 @@ template <ResultCode func(u32*, u64, u64, u64, u32, s32)>
 void SvcWrap() {
     u32 param_1 = 0;
     u32 retval =
-        func(&param_1, PARAM(1), PARAM(2), PARAM(3), (u32)PARAM(4), (s32)(PARAM(5) & 0xFFFFFFFF))
+        func(&param_1, Param(1), Param(2), Param(3), (u32)Param(4), (s32)(Param(5) & 0xFFFFFFFF))
             .raw;
     Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
@@ -151,13 +158,13 @@ template <ResultCode func(MemoryInfo*, PageInfo*, u64)>
 void SvcWrap() {
     MemoryInfo memory_info = {};
     PageInfo page_info = {};
-    u32 retval = func(&memory_info, &page_info, PARAM(2)).raw;
+    u32 retval = func(&memory_info, &page_info, Param(2)).raw;
 
-    Memory::Write64(PARAM(0), memory_info.base_address);
-    Memory::Write64(PARAM(0) + 8, memory_info.size);
-    Memory::Write32(PARAM(0) + 16, memory_info.type);
-    Memory::Write32(PARAM(0) + 20, memory_info.attributes);
-    Memory::Write32(PARAM(0) + 24, memory_info.permission);
+    Memory::Write64(Param(0), memory_info.base_address);
+    Memory::Write64(Param(0) + 8, memory_info.size);
+    Memory::Write32(Param(0) + 16, memory_info.type);
+    Memory::Write32(Param(0) + 20, memory_info.attributes);
+    Memory::Write32(Param(0) + 24, memory_info.permission);
 
     FuncReturn(retval);
 }
@@ -165,7 +172,7 @@ void SvcWrap() {
 template <ResultCode func(u32*, u64, u64, u32)>
 void SvcWrap() {
     u32 param_1 = 0;
-    u32 retval = func(&param_1, PARAM(1), PARAM(2), (u32)(PARAM(3) & 0xFFFFFFFF)).raw;
+    u32 retval = func(&param_1, Param(1), Param(2), (u32)(Param(3) & 0xFFFFFFFF)).raw;
     Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
@@ -174,7 +181,7 @@ template <ResultCode func(Handle*, u64, u32, u32)>
 void SvcWrap() {
     u32 param_1 = 0;
     u32 retval =
-        func(&param_1, PARAM(1), (u32)(PARAM(2) & 0xFFFFFFFF), (u32)(PARAM(3) & 0xFFFFFFFF)).raw;
+        func(&param_1, Param(1), (u32)(Param(2) & 0xFFFFFFFF), (u32)(Param(3) & 0xFFFFFFFF)).raw;
     Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
@@ -182,14 +189,14 @@ void SvcWrap() {
 template <ResultCode func(u64, u32, s32, s64)>
 void SvcWrap() {
     FuncReturn(
-        func(PARAM(0), (u32)(PARAM(1) & 0xFFFFFFFF), (s32)(PARAM(2) & 0xFFFFFFFF), (s64)PARAM(3))
+        func(Param(0), (u32)(Param(1) & 0xFFFFFFFF), (s32)(Param(2) & 0xFFFFFFFF), (s64)Param(3))
             .raw);
 }
 
 template <ResultCode func(u64, u32, s32, s32)>
 void SvcWrap() {
-    FuncReturn(func(PARAM(0), (u32)(PARAM(1) & 0xFFFFFFFF), (s32)(PARAM(2) & 0xFFFFFFFF),
-                    (s32)(PARAM(3) & 0xFFFFFFFF))
+    FuncReturn(func(Param(0), (u32)(Param(1) & 0xFFFFFFFF), (s32)(Param(2) & 0xFFFFFFFF),
+                    (s32)(Param(3) & 0xFFFFFFFF))
                    .raw);
 }
 
@@ -219,20 +226,17 @@ void SvcWrap() {
 
 template <void func(s64)>
 void SvcWrap() {
-    func((s64)PARAM(0));
+    func((s64)Param(0));
 }
 
 template <void func(u64, u64 len)>
 void SvcWrap() {
-    func(PARAM(0), PARAM(1));
+    func(Param(0), Param(1));
 }
 
 template <void func(u64, u64, u64)>
 void SvcWrap() {
-    func(PARAM(0), PARAM(1), PARAM(2));
+    func(Param(0), Param(1), Param(2));
 }
 
-#undef PARAM
-#undef FuncReturn
-
 } // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 3f12a84dc..b5c16cfbb 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -65,10 +65,7 @@ void Thread::Stop() {
     wait_objects.clear();
 
     // Mark the TLS slot in the thread's page as free.
-    const u64 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE;
-    const u64 tls_slot =
-        ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
-    Core::CurrentProcess()->tls_slots[tls_page].reset(tls_slot);
+    owner_process->FreeTLSSlot(tls_address);
 }
 
 void WaitCurrentThread_Sleep() {
@@ -178,32 +175,6 @@ void Thread::ResumeFromWait() {
 }
 
 /**
- * Finds a free location for the TLS section of a thread.
- * @param tls_slots The TLS page array of the thread's owner process.
- * Returns a tuple of (page, slot, alloc_needed) where:
- * page: The index of the first allocated TLS page that has free slots.
- * slot: The index of the first free slot in the indicated page.
- * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full).
- */
-static std::tuple<std::size_t, std::size_t, bool> GetFreeThreadLocalSlot(
-    const std::vector<std::bitset<8>>& tls_slots) {
-    // Iterate over all the allocated pages, and try to find one where not all slots are used.
-    for (std::size_t page = 0; page < tls_slots.size(); ++page) {
-        const auto& page_tls_slots = tls_slots[page];
-        if (!page_tls_slots.all()) {
-            // We found a page with at least one free slot, find which slot it is
-            for (std::size_t slot = 0; slot < page_tls_slots.size(); ++slot) {
-                if (!page_tls_slots.test(slot)) {
-                    return std::make_tuple(page, slot, false);
-                }
-            }
-        }
-    }
-
-    return std::make_tuple(0, 0, true);
-}
-
-/**
  * Resets a thread context, making it ready to be scheduled and run by the CPU
  * @param context Thread context to reset
  * @param stack_top Address of the top of the stack
@@ -217,8 +188,8 @@ static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAdd
     context.cpu_registers[0] = arg;
     context.pc = entry_point;
     context.sp = stack_top;
-    context.cpsr = 0;
-    context.fpscr = 0;
+    context.pstate = 0;
+    context.fpcr = 0;
 }
 
 ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point,
@@ -264,32 +235,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
     thread->owner_process = owner_process;
     thread->scheduler = Core::System::GetInstance().Scheduler(processor_id);
     thread->scheduler->AddThread(thread, priority);
-
-    // Find the next available TLS index, and mark it as used
-    auto& tls_slots = owner_process->tls_slots;
-
-    auto [available_page, available_slot, needs_allocation] = GetFreeThreadLocalSlot(tls_slots);
-    if (needs_allocation) {
-        tls_slots.emplace_back(0); // The page is completely available at the start
-        available_page = tls_slots.size() - 1;
-        available_slot = 0; // Use the first slot in the new page
-
-        // Allocate some memory from the end of the linear heap for this region.
-        const size_t offset = thread->tls_memory->size();
-        thread->tls_memory->insert(thread->tls_memory->end(), Memory::PAGE_SIZE, 0);
-
-        auto& vm_manager = owner_process->vm_manager;
-        vm_manager.RefreshMemoryBlockMappings(thread->tls_memory.get());
-
-        vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE,
-                                  thread->tls_memory, 0, Memory::PAGE_SIZE,
-                                  MemoryState::ThreadLocal);
-    }
-
-    // Mark the slot as used
-    tls_slots[available_page].set(available_slot);
-    thread->tls_address = Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE +
-                          available_slot * Memory::TLS_ENTRY_SIZE;
+    thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
 
     // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
     // to initialize the context
@@ -311,13 +257,14 @@ void Thread::BoostPriority(u32 priority) {
 }
 
 SharedPtr<Thread> SetupMainThread(KernelCore& kernel, VAddr entry_point, u32 priority,
-                                  SharedPtr<Process> owner_process) {
+                                  Process& owner_process) {
     // Setup page table so we can write to memory
-    SetCurrentPageTable(&Core::CurrentProcess()->vm_manager.page_table);
+    SetCurrentPageTable(&owner_process.VMManager().page_table);
 
     // Initialize new "main" thread
+    const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
     auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, THREADPROCESSORID_0,
-                                     Memory::STACK_AREA_VADDR_END, std::move(owner_process));
+                                     stack_top, &owner_process);
 
     SharedPtr<Thread> thread = std::move(thread_res).Unwrap();
 
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index cb57ee78a..4250144c3 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -62,6 +62,9 @@ enum class ThreadWakeupReason {
 
 class Thread final : public WaitObject {
 public:
+    using TLSMemory = std::vector<u8>;
+    using TLSMemoryPtr = std::shared_ptr<TLSMemory>;
+
     /**
      * Creates and returns a new thread. The new thread is immediately scheduled
      * @param kernel The kernel instance this thread will be created under.
@@ -134,6 +137,14 @@ public:
         return thread_id;
     }
 
+    TLSMemoryPtr& GetTLSMemory() {
+        return tls_memory;
+    }
+
+    const TLSMemoryPtr& GetTLSMemory() const {
+        return tls_memory;
+    }
+
     /**
      * Resumes a thread from waiting
      */
@@ -254,7 +265,7 @@ public:
     Handle callback_handle;
 
     using WakeupCallback = bool(ThreadWakeupReason reason, SharedPtr<Thread> thread,
-                                SharedPtr<WaitObject> object, size_t index);
+                                SharedPtr<WaitObject> object, std::size_t index);
     // Callback that will be invoked when the thread is resumed from a waiting state. If the thread
     // was waiting via WaitSynchronizationN then the object will be the last object that became
     // available. In case of a timeout, the object will be nullptr.
@@ -269,7 +280,7 @@ private:
     explicit Thread(KernelCore& kernel);
     ~Thread() override;
 
-    std::shared_ptr<std::vector<u8>> tls_memory = std::make_shared<std::vector<u8>>();
+    TLSMemoryPtr tls_memory = std::make_shared<TLSMemory>();
 };
 
 /**
@@ -281,7 +292,7 @@ private:
  * @return A shared pointer to the main thread
  */
 SharedPtr<Thread> SetupMainThread(KernelCore& kernel, VAddr entry_point, u32 priority,
-                                  SharedPtr<Process> owner_process);
+                                  Process& owner_process);
 
 /**
  * Gets the current thread
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 479cacb62..e412309fd 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -9,6 +9,7 @@
 #include "common/logging/log.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
@@ -54,30 +55,32 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
 }
 
 VMManager::VMManager() {
-    Reset();
+    // Default to assuming a 39-bit address space. This way we have a sane
+    // starting point with executables that don't provide metadata.
+    Reset(FileSys::ProgramAddressSpaceType::Is39Bit);
 }
 
 VMManager::~VMManager() {
-    Reset();
+    Reset(FileSys::ProgramAddressSpaceType::Is39Bit);
 }
 
-void VMManager::Reset() {
-    vma_map.clear();
+void VMManager::Reset(FileSys::ProgramAddressSpaceType type) {
+    Clear();
+
+    InitializeMemoryRegionRanges(type);
+
+    page_table.Resize(address_space_width);
 
     // Initialize the map with a single free region covering the entire managed space.
     VirtualMemoryArea initial_vma;
-    initial_vma.size = MAX_ADDRESS;
+    initial_vma.size = address_space_end;
     vma_map.emplace(initial_vma.base, initial_vma);
 
-    page_table.pointers.fill(nullptr);
-    page_table.special_regions.clear();
-    page_table.attributes.fill(Memory::PageType::Unmapped);
-
     UpdatePageTableForVMA(initial_vma);
 }
 
 VMManager::VMAHandle VMManager::FindVMA(VAddr target) const {
-    if (target >= MAX_ADDRESS) {
+    if (target >= address_space_end) {
         return vma_map.end();
     } else {
         return std::prev(vma_map.upper_bound(target));
@@ -86,7 +89,7 @@ VMManager::VMAHandle VMManager::FindVMA(VAddr target) const {
 
 ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
                                                           std::shared_ptr<std::vector<u8>> block,
-                                                          size_t offset, u64 size,
+                                                          std::size_t offset, u64 size,
                                                           MemoryState state) {
     ASSERT(block != nullptr);
     ASSERT(offset + size <= block->size());
@@ -291,7 +294,7 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMARange(VAddr target, u64 size) {
 
     const VAddr target_end = target + size;
     ASSERT(target_end >= target);
-    ASSERT(target_end <= MAX_ADDRESS);
+    ASSERT(target_end <= address_space_end);
     ASSERT(size > 0);
 
     VMAIter begin_vma = StripIterConstness(FindVMA(target));
@@ -382,6 +385,85 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
     }
 }
 
+void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType type) {
+    u64 map_region_size = 0;
+    u64 heap_region_size = 0;
+    u64 new_map_region_size = 0;
+    u64 tls_io_region_size = 0;
+
+    switch (type) {
+    case FileSys::ProgramAddressSpaceType::Is32Bit:
+        address_space_width = 32;
+        code_region_base = 0x200000;
+        code_region_end = code_region_base + 0x3FE00000;
+        map_region_size = 0x40000000;
+        heap_region_size = 0x40000000;
+        break;
+    case FileSys::ProgramAddressSpaceType::Is36Bit:
+        address_space_width = 36;
+        code_region_base = 0x8000000;
+        code_region_end = code_region_base + 0x78000000;
+        map_region_size = 0x180000000;
+        heap_region_size = 0x180000000;
+        break;
+    case FileSys::ProgramAddressSpaceType::Is32BitNoMap:
+        address_space_width = 32;
+        code_region_base = 0x200000;
+        code_region_end = code_region_base + 0x3FE00000;
+        map_region_size = 0;
+        heap_region_size = 0x80000000;
+        break;
+    case FileSys::ProgramAddressSpaceType::Is39Bit:
+        address_space_width = 39;
+        code_region_base = 0x8000000;
+        code_region_end = code_region_base + 0x80000000;
+        map_region_size = 0x1000000000;
+        heap_region_size = 0x180000000;
+        new_map_region_size = 0x80000000;
+        tls_io_region_size = 0x1000000000;
+        break;
+    default:
+        UNREACHABLE_MSG("Invalid address space type specified: {}", static_cast<u32>(type));
+        return;
+    }
+
+    address_space_base = 0;
+    address_space_end = 1ULL << address_space_width;
+
+    map_region_base = code_region_end;
+    map_region_end = map_region_base + map_region_size;
+
+    heap_region_base = map_region_end;
+    heap_region_end = heap_region_base + heap_region_size;
+
+    new_map_region_base = heap_region_end;
+    new_map_region_end = new_map_region_base + new_map_region_size;
+
+    tls_io_region_base = new_map_region_end;
+    tls_io_region_end = tls_io_region_base + tls_io_region_size;
+
+    if (new_map_region_size == 0) {
+        new_map_region_base = address_space_base;
+        new_map_region_end = address_space_end;
+    }
+}
+
+void VMManager::Clear() {
+    ClearVMAMap();
+    ClearPageTable();
+}
+
+void VMManager::ClearVMAMap() {
+    vma_map.clear();
+}
+
+void VMManager::ClearPageTable() {
+    std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
+    page_table.special_regions.clear();
+    std::fill(page_table.attributes.begin(), page_table.attributes.end(),
+              Memory::PageType::Unmapped);
+}
+
 u64 VMManager::GetTotalMemoryUsage() const {
     LOG_WARNING(Kernel, "(STUBBED) called");
     return 0xF8000000;
@@ -392,14 +474,80 @@ u64 VMManager::GetTotalHeapUsage() const {
     return 0x0;
 }
 
-VAddr VMManager::GetAddressSpaceBaseAddr() const {
-    LOG_WARNING(Kernel, "(STUBBED) called");
-    return 0x8000000;
+VAddr VMManager::GetAddressSpaceBaseAddress() const {
+    return address_space_base;
+}
+
+VAddr VMManager::GetAddressSpaceEndAddress() const {
+    return address_space_end;
 }
 
 u64 VMManager::GetAddressSpaceSize() const {
-    LOG_WARNING(Kernel, "(STUBBED) called");
-    return MAX_ADDRESS;
+    return address_space_end - address_space_base;
+}
+
+u64 VMManager::GetAddressSpaceWidth() const {
+    return address_space_width;
+}
+
+VAddr VMManager::GetCodeRegionBaseAddress() const {
+    return code_region_base;
+}
+
+VAddr VMManager::GetCodeRegionEndAddress() const {
+    return code_region_end;
+}
+
+u64 VMManager::GetCodeRegionSize() const {
+    return code_region_end - code_region_base;
+}
+
+VAddr VMManager::GetHeapRegionBaseAddress() const {
+    return heap_region_base;
+}
+
+VAddr VMManager::GetHeapRegionEndAddress() const {
+    return heap_region_end;
+}
+
+u64 VMManager::GetHeapRegionSize() const {
+    return heap_region_end - heap_region_base;
+}
+
+VAddr VMManager::GetMapRegionBaseAddress() const {
+    return map_region_base;
+}
+
+VAddr VMManager::GetMapRegionEndAddress() const {
+    return map_region_end;
+}
+
+u64 VMManager::GetMapRegionSize() const {
+    return map_region_end - map_region_base;
+}
+
+VAddr VMManager::GetNewMapRegionBaseAddress() const {
+    return new_map_region_base;
+}
+
+VAddr VMManager::GetNewMapRegionEndAddress() const {
+    return new_map_region_end;
+}
+
+u64 VMManager::GetNewMapRegionSize() const {
+    return new_map_region_end - new_map_region_base;
+}
+
+VAddr VMManager::GetTLSIORegionBaseAddress() const {
+    return tls_io_region_base;
+}
+
+VAddr VMManager::GetTLSIORegionEndAddress() const {
+    return tls_io_region_end;
+}
+
+u64 VMManager::GetTLSIORegionSize() const {
+    return tls_io_region_end - tls_io_region_base;
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 98bd04bea..015559a64 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -12,6 +12,10 @@
 #include "core/memory.h"
 #include "core/memory_hook.h"
 
+namespace FileSys {
+enum class ProgramAddressSpaceType : u8;
+}
+
 namespace Kernel {
 
 enum class VMAType : u8 {
@@ -81,7 +85,7 @@ struct VirtualMemoryArea {
     /// Memory block backing this VMA.
     std::shared_ptr<std::vector<u8>> backing_block = nullptr;
     /// Offset into the backing_memory the mapping starts from.
-    size_t offset = 0;
+    std::size_t offset = 0;
 
     // Settings for type = BackingMemory
     /// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed.
@@ -111,12 +115,6 @@ struct VirtualMemoryArea {
 class VMManager final {
 public:
     /**
-     * The maximum amount of address space managed by the kernel.
-     * @todo This was selected arbitrarily, and should be verified for Switch OS.
-     */
-    static constexpr VAddr MAX_ADDRESS{0x1000000000ULL};
-
-    /**
      * A map covering the entirety of the managed address space, keyed by the `base` field of each
      * VMA. It must always be modified by splitting or merging VMAs, so that the invariant
      * `elem.base + elem.size == next.base` is preserved, and mergeable regions must always be
@@ -130,7 +128,7 @@ public:
     ~VMManager();
 
     /// Clears the address space map, re-initializing with a single free area.
-    void Reset();
+    void Reset(FileSys::ProgramAddressSpaceType type);
 
     /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
     VMAHandle FindVMA(VAddr target) const;
@@ -147,7 +145,7 @@ public:
      * @param state MemoryState tag to attach to the VMA.
      */
     ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
-                                        size_t offset, u64 size, MemoryState state);
+                                        std::size_t offset, u64 size, MemoryState state);
 
     /**
      * Maps an unmanaged host memory pointer at a given address.
@@ -195,12 +193,63 @@ public:
     /// Gets the total heap usage, used by svcGetInfo
     u64 GetTotalHeapUsage() const;
 
-    /// Gets the total address space base address, used by svcGetInfo
-    VAddr GetAddressSpaceBaseAddr() const;
+    /// Gets the address space base address
+    VAddr GetAddressSpaceBaseAddress() const;
 
-    /// Gets the total address space address size, used by svcGetInfo
+    /// Gets the address space end address
+    VAddr GetAddressSpaceEndAddress() const;
+
+    /// Gets the total address space address size in bytes
     u64 GetAddressSpaceSize() const;
 
+    /// Gets the address space width in bits.
+    u64 GetAddressSpaceWidth() const;
+
+    /// Gets the base address of the code region.
+    VAddr GetCodeRegionBaseAddress() const;
+
+    /// Gets the end address of the code region.
+    VAddr GetCodeRegionEndAddress() const;
+
+    /// Gets the total size of the code region in bytes.
+    u64 GetCodeRegionSize() const;
+
+    /// Gets the base address of the heap region.
+    VAddr GetHeapRegionBaseAddress() const;
+
+    /// Gets the end address of the heap region;
+    VAddr GetHeapRegionEndAddress() const;
+
+    /// Gets the total size of the heap region in bytes.
+    u64 GetHeapRegionSize() const;
+
+    /// Gets the base address of the map region.
+    VAddr GetMapRegionBaseAddress() const;
+
+    /// Gets the end address of the map region.
+    VAddr GetMapRegionEndAddress() const;
+
+    /// Gets the total size of the map region in bytes.
+    u64 GetMapRegionSize() const;
+
+    /// Gets the base address of the new map region.
+    VAddr GetNewMapRegionBaseAddress() const;
+
+    /// Gets the end address of the new map region.
+    VAddr GetNewMapRegionEndAddress() const;
+
+    /// Gets the total size of the new map region in bytes.
+    u64 GetNewMapRegionSize() const;
+
+    /// Gets the base address of the TLS IO region.
+    VAddr GetTLSIORegionBaseAddress() const;
+
+    /// Gets the end address of the TLS IO region.
+    VAddr GetTLSIORegionEndAddress() const;
+
+    /// Gets the total size of the TLS IO region in bytes.
+    u64 GetTLSIORegionSize() const;
+
     /// Each VMManager has its own page table, which is set as the main one when the owning process
     /// is scheduled.
     Memory::PageTable page_table;
@@ -240,5 +289,36 @@ private:
 
     /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
     void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
+
+    /// Initializes memory region ranges to adhere to a given address space type.
+    void InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType type);
+
+    /// Clears the underlying map and page table.
+    void Clear();
+
+    /// Clears out the VMA map, unmapping any previously mapped ranges.
+    void ClearVMAMap();
+
+    /// Clears out the page table
+    void ClearPageTable();
+
+    u32 address_space_width = 0;
+    VAddr address_space_base = 0;
+    VAddr address_space_end = 0;
+
+    VAddr code_region_base = 0;
+    VAddr code_region_end = 0;
+
+    VAddr heap_region_base = 0;
+    VAddr heap_region_end = 0;
+
+    VAddr map_region_base = 0;
+    VAddr map_region_end = 0;
+
+    VAddr new_map_region_base = 0;
+    VAddr new_map_region_end = 0;
+
+    VAddr tls_io_region_base = 0;
+    VAddr tls_io_region_end = 0;
 };
 } // namespace Kernel
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp
index eef00b729..b190ceb98 100644
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -81,7 +81,7 @@ void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) {
         }
     }
 
-    size_t index = thread->GetWaitObjectIndex(this);
+    std::size_t index = thread->GetWaitObjectIndex(this);
 
     for (auto& object : thread->wait_objects)
         object->RemoveWaitingThread(thread.get());
diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/wait_object.h
index 0bd97133c..f4367ee28 100644
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/wait_object.h
@@ -69,7 +69,7 @@ private:
 template <>
 inline SharedPtr<WaitObject> DynamicObjectCast<WaitObject>(SharedPtr<Object> object) {
     if (object != nullptr && object->IsWaitable()) {
-        return boost::static_pointer_cast<WaitObject>(std::move(object));
+        return boost::static_pointer_cast<WaitObject>(object);
     }
     return nullptr;
 }
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 1502dbf55..e61748ca3 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -34,7 +34,7 @@ public:
         static const FunctionInfo functions[] = {
             {0, &IProfile::Get, "Get"},
             {1, &IProfile::GetBase, "GetBase"},
-            {10, nullptr, "GetImageSize"},
+            {10, &IProfile::GetImageSize, "GetImageSize"},
             {11, &IProfile::LoadImage, "LoadImage"},
         };
         RegisterHandlers(functions);
@@ -93,6 +93,14 @@ private:
         rb.Push<u32>(jpeg_size);
     }
 
+    void GetImageSize(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_ACC, "(STUBBED) called");
+        constexpr u32 jpeg_size = 107;
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push<u32>(jpeg_size);
+    }
+
     const ProfileManager& profile_manager;
     UUID user_id; ///< The user id this profile refers to.
 };
@@ -122,11 +130,10 @@ private:
 
     void GetAccountId(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_ACC, "(STUBBED) called");
-        // TODO(Subv): Find out what this actually does and implement it. Stub it as an error for
-        // now since we do not implement NNID. Returning a bogus id here will cause games to send
-        // invalid IPC requests after ListOpenUsers is called.
-        IPC::ResponseBuilder rb{ctx, 2};
-        rb.Push(ResultCode(-1));
+        // Should return a nintendo account ID
+        IPC::ResponseBuilder rb{ctx, 4};
+        rb.Push(RESULT_SUCCESS);
+        rb.PushRaw<u64>(1);
     }
 };
 
diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp
index 4ccebef23..bcb3475db 100644
--- a/src/core/hle/service/acc/profile_manager.cpp
+++ b/src/core/hle/service/acc/profile_manager.cpp
@@ -25,7 +25,7 @@ const UUID& UUID::Generate() {
 ProfileManager::ProfileManager() {
     // TODO(ogniK): Create the default user we have for now until loading/saving users is added
     auto user_uuid = UUID{1, 0};
-    CreateNewUser(user_uuid, Settings::values.username);
+    ASSERT(CreateNewUser(user_uuid, Settings::values.username).IsSuccess());
     OpenUser(user_uuid);
 }
 
@@ -33,7 +33,7 @@ ProfileManager::~ProfileManager() = default;
 
 /// After a users creation it needs to be "registered" to the system. AddToProfiles handles the
 /// internal management of the users profiles
-boost::optional<size_t> ProfileManager::AddToProfiles(const ProfileInfo& user) {
+boost::optional<std::size_t> ProfileManager::AddToProfiles(const ProfileInfo& user) {
     if (user_count >= MAX_USERS) {
         return boost::none;
     }
@@ -42,7 +42,7 @@ boost::optional<size_t> ProfileManager::AddToProfiles(const ProfileInfo& user) {
 }
 
 /// Deletes a specific profile based on it's profile index
-bool ProfileManager::RemoveProfileAtIndex(size_t index) {
+bool ProfileManager::RemoveProfileAtIndex(std::size_t index) {
     if (index >= MAX_USERS || index >= user_count) {
         return false;
     }
@@ -91,7 +91,8 @@ ResultCode ProfileManager::CreateNewUser(UUID uuid, const ProfileUsername& usern
 /// specifically by allowing an std::string for the username. This is required specifically since
 /// we're loading a string straight from the config
 ResultCode ProfileManager::CreateNewUser(UUID uuid, const std::string& username) {
-    ProfileUsername username_output;
+    ProfileUsername username_output{};
+
     if (username.size() > username_output.size()) {
         std::copy_n(username.begin(), username_output.size(), username_output.begin());
     } else {
@@ -101,7 +102,7 @@ ResultCode ProfileManager::CreateNewUser(UUID uuid, const std::string& username)
 }
 
 /// Returns a users profile index based on their user id.
-boost::optional<size_t> ProfileManager::GetUserIndex(const UUID& uuid) const {
+boost::optional<std::size_t> ProfileManager::GetUserIndex(const UUID& uuid) const {
     if (!uuid) {
         return boost::none;
     }
@@ -110,16 +111,17 @@ boost::optional<size_t> ProfileManager::GetUserIndex(const UUID& uuid) const {
     if (iter == profiles.end()) {
         return boost::none;
     }
-    return static_cast<size_t>(std::distance(profiles.begin(), iter));
+    return static_cast<std::size_t>(std::distance(profiles.begin(), iter));
 }
 
 /// Returns a users profile index based on their profile
-boost::optional<size_t> ProfileManager::GetUserIndex(const ProfileInfo& user) const {
+boost::optional<std::size_t> ProfileManager::GetUserIndex(const ProfileInfo& user) const {
     return GetUserIndex(user.user_uuid);
 }
 
 /// Returns the data structure used by the switch when GetProfileBase is called on acc:*
-bool ProfileManager::GetProfileBase(boost::optional<size_t> index, ProfileBase& profile) const {
+bool ProfileManager::GetProfileBase(boost::optional<std::size_t> index,
+                                    ProfileBase& profile) const {
     if (index == boost::none || index >= MAX_USERS) {
         return false;
     }
@@ -143,14 +145,16 @@ bool ProfileManager::GetProfileBase(const ProfileInfo& user, ProfileBase& profil
 
 /// Returns the current user count on the system. We keep a variable which tracks the count so we
 /// don't have to loop the internal profile array every call.
-size_t ProfileManager::GetUserCount() const {
+
+std::size_t ProfileManager::GetUserCount() const {
     return user_count;
 }
 
 /// Lists the current "opened" users on the system. Users are typically not open until they sign
 /// into something or pick a profile. As of right now users should all be open until qlaunch is
 /// booting
-size_t ProfileManager::GetOpenUserCount() const {
+
+std::size_t ProfileManager::GetOpenUserCount() const {
     return std::count_if(profiles.begin(), profiles.end(),
                          [](const ProfileInfo& p) { return p.is_open; });
 }
@@ -206,7 +210,7 @@ UUID ProfileManager::GetLastOpenedUser() const {
 }
 
 /// Return the users profile base and the unknown arbitary data.
-bool ProfileManager::GetProfileBaseAndData(boost::optional<size_t> index, ProfileBase& profile,
+bool ProfileManager::GetProfileBaseAndData(boost::optional<std::size_t> index, ProfileBase& profile,
                                            ProfileData& data) const {
     if (GetProfileBase(index, profile)) {
         data = profiles[index.get()].data;
diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h
index cd8df93a5..bffd4cf4d 100644
--- a/src/core/hle/service/acc/profile_manager.h
+++ b/src/core/hle/service/acc/profile_manager.h
@@ -12,8 +12,8 @@
 #include "core/hle/result.h"
 
 namespace Service::Account {
-constexpr size_t MAX_USERS = 8;
-constexpr size_t MAX_DATA = 128;
+constexpr std::size_t MAX_USERS = 8;
+constexpr std::size_t MAX_DATA = 128;
 constexpr u128 INVALID_UUID{{0, 0}};
 
 struct UUID {
@@ -87,18 +87,18 @@ public:
     ResultCode AddUser(const ProfileInfo& user);
     ResultCode CreateNewUser(UUID uuid, const ProfileUsername& username);
     ResultCode CreateNewUser(UUID uuid, const std::string& username);
-    boost::optional<size_t> GetUserIndex(const UUID& uuid) const;
-    boost::optional<size_t> GetUserIndex(const ProfileInfo& user) const;
-    bool GetProfileBase(boost::optional<size_t> index, ProfileBase& profile) const;
+    boost::optional<std::size_t> GetUserIndex(const UUID& uuid) const;
+    boost::optional<std::size_t> GetUserIndex(const ProfileInfo& user) const;
+    bool GetProfileBase(boost::optional<std::size_t> index, ProfileBase& profile) const;
     bool GetProfileBase(UUID uuid, ProfileBase& profile) const;
     bool GetProfileBase(const ProfileInfo& user, ProfileBase& profile) const;
-    bool GetProfileBaseAndData(boost::optional<size_t> index, ProfileBase& profile,
+    bool GetProfileBaseAndData(boost::optional<std::size_t> index, ProfileBase& profile,
                                ProfileData& data) const;
     bool GetProfileBaseAndData(UUID uuid, ProfileBase& profile, ProfileData& data) const;
     bool GetProfileBaseAndData(const ProfileInfo& user, ProfileBase& profile,
                                ProfileData& data) const;
-    size_t GetUserCount() const;
-    size_t GetOpenUserCount() const;
+    std::size_t GetUserCount() const;
+    std::size_t GetOpenUserCount() const;
     bool UserExists(UUID uuid) const;
     void OpenUser(UUID uuid);
     void CloseUser(UUID uuid);
@@ -110,9 +110,9 @@ public:
 
 private:
     std::array<ProfileInfo, MAX_USERS> profiles{};
-    size_t user_count = 0;
-    boost::optional<size_t> AddToProfiles(const ProfileInfo& profile);
-    bool RemoveProfileAtIndex(size_t index);
+    std::size_t user_count = 0;
+    boost::optional<std::size_t> AddToProfiles(const ProfileInfo& profile);
+    bool RemoveProfileAtIndex(std::size_t index);
     UUID last_opened_user{INVALID_UUID};
 };
 
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index a57ed3042..69bfce1c1 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -20,6 +20,7 @@
 #include "core/hle/service/nvflinger/nvflinger.h"
 #include "core/hle/service/pm/pm.h"
 #include "core/hle/service/set/set.h"
+#include "core/hle/service/vi/vi.h"
 #include "core/settings.h"
 
 namespace Service::AM {
@@ -334,7 +335,7 @@ ICommonStateGetter::ICommonStateGetter() : ServiceFramework("ICommonStateGetter"
         {51, nullptr, "SetVrModeEnabled"},
         {52, nullptr, "SwitchLcdBacklight"},
         {55, nullptr, "IsInControllerFirmwareUpdateSection"},
-        {60, nullptr, "GetDefaultDisplayResolution"},
+        {60, &ICommonStateGetter::GetDefaultDisplayResolution, "GetDefaultDisplayResolution"},
         {61, &ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent,
          "GetDefaultDisplayResolutionChangeEvent"},
         {62, nullptr, "GetHdcpAuthenticationState"},
@@ -393,6 +394,21 @@ void ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent(Kernel::HLEReque
     LOG_WARNING(Service_AM, "(STUBBED) called");
 }
 
+void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 4};
+    rb.Push(RESULT_SUCCESS);
+
+    if (Settings::values.use_docked_mode) {
+        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth));
+        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight));
+    } else {
+        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth));
+        rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight));
+    }
+
+    LOG_DEBUG(Service_AM, "called");
+}
+
 void ICommonStateGetter::GetOperationMode(Kernel::HLERequestContext& ctx) {
     const bool use_docked_mode{Settings::values.use_docked_mode};
     IPC::ResponseBuilder rb{ctx, 3};
@@ -446,7 +462,7 @@ private:
 
         std::memcpy(&buffer[offset], data.data(), data.size());
 
-        IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 0)};
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
 
         LOG_DEBUG(Service_AM, "called, offset={}", offset);
@@ -456,13 +472,13 @@ private:
         IPC::RequestParser rp{ctx};
 
         const u64 offset{rp.Pop<u64>()};
-        const size_t size{ctx.GetWriteBufferSize()};
+        const std::size_t size{ctx.GetWriteBufferSize()};
 
         ASSERT(offset + size <= buffer.size());
 
         ctx.WriteBuffer(buffer.data() + offset, size);
 
-        IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 0)};
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
 
         LOG_DEBUG(Service_AM, "called, offset={}", offset);
@@ -552,7 +568,7 @@ private:
         IPC::RequestParser rp{ctx};
         storage_stack.push(rp.PopIpcInterface<AM::IStorage>());
 
-        IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 0)};
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
 
         LOG_DEBUG(Service_AM, "called");
@@ -600,7 +616,7 @@ void ILibraryAppletCreator::CreateStorage(Kernel::HLERequestContext& ctx) {
     const u64 size{rp.Pop<u64>()};
     std::vector<u8> buffer(size);
 
-    IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 1)};
+    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
     rb.Push(RESULT_SUCCESS);
     rb.PushIpcInterface<AM::IStorage>(std::move(buffer));
 
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index fd9ae296b..b39b0d838 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -123,6 +123,7 @@ private:
     void GetOperationMode(Kernel::HLERequestContext& ctx);
     void GetPerformanceMode(Kernel::HLERequestContext& ctx);
     void GetBootMode(Kernel::HLERequestContext& ctx);
+    void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx);
 
     Kernel::SharedPtr<Kernel::Event> event;
 };
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 80a002322..ff1edefbb 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -190,7 +190,7 @@ void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) {
 
     ctx.WriteBuffer(DefaultDevice);
 
-    IPC::ResponseBuilder rb = rp.MakeBuilder(3, 0, 0);
+    IPC::ResponseBuilder rb{ctx, 3};
 
     rb.Push(RESULT_SUCCESS);
     rb.Push<u32>(1); // Amount of audio devices
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index e84c4fa2b..6073f4ecd 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -10,6 +10,7 @@
 #include "common/alignment.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/kernel/hle_ipc.h"
@@ -25,7 +26,7 @@ public:
             {0, &IAudioRenderer::GetAudioRendererSampleRate, "GetAudioRendererSampleRate"},
             {1, &IAudioRenderer::GetAudioRendererSampleCount, "GetAudioRendererSampleCount"},
             {2, &IAudioRenderer::GetAudioRendererMixBufferCount, "GetAudioRendererMixBufferCount"},
-            {3, nullptr, "GetAudioRendererState"},
+            {3, &IAudioRenderer::GetAudioRendererState, "GetAudioRendererState"},
             {4, &IAudioRenderer::RequestUpdateAudioRenderer, "RequestUpdateAudioRenderer"},
             {5, &IAudioRenderer::StartAudioRenderer, "StartAudioRenderer"},
             {6, &IAudioRenderer::StopAudioRenderer, "StopAudioRenderer"},
@@ -62,6 +63,13 @@ private:
         LOG_DEBUG(Service_Audio, "called");
     }
 
+    void GetAudioRendererState(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push<u32>(static_cast<u32>(renderer->GetStreamState()));
+        LOG_DEBUG(Service_Audio, "called");
+    }
+
     void GetAudioRendererMixBufferCount(Kernel::HLERequestContext& ctx) {
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
@@ -137,7 +145,7 @@ private:
         constexpr std::array<char, 15> audio_interface{{"AudioInterface"}};
         ctx.WriteBuffer(audio_interface);
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(3, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
         rb.Push<u32>(1);
     }
@@ -151,7 +159,7 @@ private:
         auto file_buffer = ctx.ReadBuffer();
         auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
     }
 
@@ -162,7 +170,7 @@ private:
         constexpr std::array<char, 12> audio_interface{{"AudioDevice"}};
         ctx.WriteBuffer(audio_interface);
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(3, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
         rb.Push<u32>(1);
     }
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 668fef145..fc6067e59 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -61,7 +61,7 @@ private:
 
     bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input,
                                    std::vector<opus_int16>& output) {
-        size_t raw_output_sz = output.size() * sizeof(opus_int16);
+        std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
         if (sizeof(OpusHeader) > input.size())
             return false;
         OpusHeader hdr{};
@@ -96,7 +96,7 @@ private:
     u32 channel_count;
 };
 
-static size_t WorkerBufferSize(u32 channel_count) {
+static std::size_t WorkerBufferSize(u32 channel_count) {
     ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
     return opus_decoder_get_size(static_cast<int>(channel_count));
 }
@@ -129,7 +129,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
                "Invalid sample rate");
     ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
 
-    size_t worker_sz = WorkerBufferSize(channel_count);
+    std::size_t worker_sz = WorkerBufferSize(channel_count);
     ASSERT_MSG(buffer_sz < worker_sz, "Worker buffer too large");
     std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
         static_cast<OpusDecoder*>(operator new(worker_sz))};
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index b436ce4e6..2f15ac2a6 100644
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -2,8 +2,17 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <array>
+#include <cstring>
+#include <ctime>
+#include <fmt/time.h>
+#include "common/file_util.h"
 #include "common/logging/log.h"
+#include "common/scm_rev.h"
+#include "common/swap.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/process.h"
 #include "core/hle/service/fatal/fatal.h"
 #include "core/hle/service/fatal/fatal_p.h"
 #include "core/hle/service/fatal/fatal_u.h"
@@ -15,16 +24,142 @@ Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
 
 Module::Interface::~Interface() = default;
 
+struct FatalInfo {
+    std::array<u64_le, 31> registers{}; // TODO(ogniK): See if this actually is registers or
+                                        // not(find a game which has non zero valeus)
+    u64_le unk0{};
+    u64_le unk1{};
+    u64_le unk2{};
+    u64_le unk3{};
+    u64_le unk4{};
+    u64_le unk5{};
+    u64_le unk6{};
+
+    std::array<u64_le, 32> backtrace{};
+    u64_le unk7{};
+    u64_le unk8{};
+    u32_le backtrace_size{};
+    u32_le unk9{};
+    u32_le unk10{}; // TODO(ogniK): Is this even used or is it just padding?
+};
+static_assert(sizeof(FatalInfo) == 0x250, "FatalInfo is an invalid size");
+
+enum class FatalType : u32 {
+    ErrorReportAndScreen = 0,
+    ErrorReport = 1,
+    ErrorScreen = 2,
+};
+
+static void GenerateErrorReport(ResultCode error_code, const FatalInfo& info) {
+    const auto title_id = Core::CurrentProcess()->GetTitleID();
+    std::string crash_report =
+        fmt::format("Yuzu {}-{} crash report\n"
+                    "Title ID:                        {:016x}\n"
+                    "Result:                          0x{:X} ({:04}-{:04d})\n"
+                    "\n",
+                    Common::g_scm_branch, Common::g_scm_desc, title_id, error_code.raw,
+                    2000 + static_cast<u32>(error_code.module.Value()),
+                    static_cast<u32>(error_code.description.Value()), info.unk8, info.unk7);
+    if (info.backtrace_size != 0x0) {
+        crash_report += "Registers:\n";
+        // TODO(ogniK): This is just a guess, find a game which actually has non zero values
+        for (size_t i = 0; i < info.registers.size(); i++) {
+            crash_report +=
+                fmt::format("    X[{:02d}]:                       {:016x}\n", i, info.registers[i]);
+        }
+        crash_report += fmt::format("    Unknown 0:                   {:016x}\n", info.unk0);
+        crash_report += fmt::format("    Unknown 1:                   {:016x}\n", info.unk1);
+        crash_report += fmt::format("    Unknown 2:                   {:016x}\n", info.unk2);
+        crash_report += fmt::format("    Unknown 3:                   {:016x}\n", info.unk3);
+        crash_report += fmt::format("    Unknown 4:                   {:016x}\n", info.unk4);
+        crash_report += fmt::format("    Unknown 5:                   {:016x}\n", info.unk5);
+        crash_report += fmt::format("    Unknown 6:                   {:016x}\n", info.unk6);
+        crash_report += "\nBacktrace:\n";
+        for (size_t i = 0; i < info.backtrace_size; i++) {
+            crash_report +=
+                fmt::format("    Backtrace[{:02d}]:               {:016x}\n", i, info.backtrace[i]);
+        }
+        crash_report += fmt::format("\nUnknown 7:                       0x{:016x}\n", info.unk7);
+        crash_report += fmt::format("Unknown 8:                       0x{:016x}\n", info.unk8);
+        crash_report += fmt::format("Unknown 9:                       0x{:016x}\n", info.unk9);
+        crash_report += fmt::format("Unknown 10:                      0x{:016x}\n", info.unk10);
+    }
+
+    LOG_ERROR(Service_Fatal, "{}", crash_report);
+
+    const std::string crashreport_dir =
+        FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "crash_logs";
+
+    if (!FileUtil::CreateFullPath(crashreport_dir)) {
+        LOG_ERROR(
+            Service_Fatal,
+            "Unable to create crash report directory. Possible log directory permissions issue.");
+        return;
+    }
+
+    const std::time_t t = std::time(nullptr);
+    const std::string crashreport_filename =
+        fmt::format("{}/{:016x}-{:%F-%H%M%S}.log", crashreport_dir, title_id, *std::localtime(&t));
+
+    auto file = FileUtil::IOFile(crashreport_filename, "wb");
+    if (file.IsOpen()) {
+        file.WriteString(crash_report);
+        LOG_ERROR(Service_Fatal, "Saving error report to {}", crashreport_filename);
+    } else {
+        LOG_ERROR(Service_Fatal, "Failed to save error report to {}", crashreport_filename);
+    }
+}
+
+static void ThrowFatalError(ResultCode error_code, FatalType fatal_type, const FatalInfo& info) {
+    LOG_ERROR(Service_Fatal, "Threw fatal error type {}", static_cast<u32>(fatal_type));
+    switch (fatal_type) {
+    case FatalType::ErrorReportAndScreen:
+        GenerateErrorReport(error_code, info);
+        [[fallthrough]];
+    case FatalType::ErrorScreen:
+        // Since we have no fatal:u error screen. We should just kill execution instead
+        ASSERT(false);
+        break;
+        // Should not throw a fatal screen but should generate an error report
+    case FatalType::ErrorReport:
+        GenerateErrorReport(error_code, info);
+        break;
+    };
+}
+
+void Module::Interface::ThrowFatal(Kernel::HLERequestContext& ctx) {
+    LOG_ERROR(Service_Fatal, "called");
+    IPC::RequestParser rp{ctx};
+    auto error_code = rp.Pop<ResultCode>();
+
+    ThrowFatalError(error_code, FatalType::ErrorScreen, {});
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
 void Module::Interface::ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx) {
+    LOG_ERROR(Service_Fatal, "called");
     IPC::RequestParser rp(ctx);
-    u32 error_code = rp.Pop<u32>();
-    LOG_WARNING(Service_Fatal, "(STUBBED) called, error_code=0x{:X}", error_code);
+    auto error_code = rp.Pop<ResultCode>();
+    auto fatal_type = rp.PopEnum<FatalType>();
+
+    ThrowFatalError(error_code, fatal_type, {}); // No info is passed with ThrowFatalWithPolicy
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
 
 void Module::Interface::ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_Fatal, "(STUBBED) called");
+    LOG_ERROR(Service_Fatal, "called");
+    IPC::RequestParser rp(ctx);
+    auto error_code = rp.Pop<ResultCode>();
+    auto fatal_type = rp.PopEnum<FatalType>();
+    auto fatal_info = ctx.ReadBuffer();
+    FatalInfo info{};
+
+    ASSERT_MSG(fatal_info.size() == sizeof(FatalInfo), "Invalid fatal info buffer size!");
+    std::memcpy(&info, fatal_info.data(), sizeof(FatalInfo));
+
+    ThrowFatalError(error_code, fatal_type, info);
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
 }
diff --git a/src/core/hle/service/fatal/fatal.h b/src/core/hle/service/fatal/fatal.h
index 4d9a5be52..09371ff7f 100644
--- a/src/core/hle/service/fatal/fatal.h
+++ b/src/core/hle/service/fatal/fatal.h
@@ -15,6 +15,7 @@ public:
         explicit Interface(std::shared_ptr<Module> module, const char* name);
         ~Interface() override;
 
+        void ThrowFatal(Kernel::HLERequestContext& ctx);
         void ThrowFatalWithPolicy(Kernel::HLERequestContext& ctx);
         void ThrowFatalWithCpuContext(Kernel::HLERequestContext& ctx);
 
diff --git a/src/core/hle/service/fatal/fatal_u.cpp b/src/core/hle/service/fatal/fatal_u.cpp
index befc307cf..1572a2051 100644
--- a/src/core/hle/service/fatal/fatal_u.cpp
+++ b/src/core/hle/service/fatal/fatal_u.cpp
@@ -8,7 +8,7 @@ namespace Service::Fatal {
 
 Fatal_U::Fatal_U(std::shared_ptr<Module> module) : Module::Interface(std::move(module), "fatal:u") {
     static const FunctionInfo functions[] = {
-        {0, nullptr, "ThrowFatal"},
+        {0, &Fatal_U::ThrowFatal, "ThrowFatal"},
         {1, &Fatal_U::ThrowFatalWithPolicy, "ThrowFatalWithPolicy"},
         {2, &Fatal_U::ThrowFatalWithCpuContext, "ThrowFatalWithCpuContext"},
     };
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 5c4971724..aed2abb71 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -197,7 +197,7 @@ ResultVal<FileSys::VirtualDir> VfsDirectoryServiceWrapper::OpenDirectory(const s
     auto dir = GetDirectoryRelativeWrapped(backing, path);
     if (dir == nullptr) {
         // TODO(DarkLordZach): Find a better error code for this
-        return ResultCode(-1);
+        return FileSys::ERROR_PATH_NOT_FOUND;
     }
     return MakeResult(dir);
 }
@@ -343,6 +343,15 @@ std::shared_ptr<FileSys::RegisteredCache> GetSDMCContents() {
     return sdmc_factory->GetSDMCContents();
 }
 
+FileSys::VirtualDir GetModificationLoadRoot(u64 title_id) {
+    LOG_TRACE(Service_FS, "Opening mod load root for tid={:016X}", title_id);
+
+    if (bis_factory == nullptr)
+        return nullptr;
+
+    return bis_factory->GetModificationLoadRoot(title_id);
+}
+
 void CreateFactories(const FileSys::VirtualFilesystem& vfs, bool overwrite) {
     if (overwrite) {
         bis_factory = nullptr;
@@ -354,9 +363,11 @@ void CreateFactories(const FileSys::VirtualFilesystem& vfs, bool overwrite) {
                                              FileSys::Mode::ReadWrite);
     auto sd_directory = vfs->OpenDirectory(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir),
                                            FileSys::Mode::ReadWrite);
+    auto load_directory = vfs->OpenDirectory(FileUtil::GetUserPath(FileUtil::UserPath::LoadDir),
+                                             FileSys::Mode::ReadWrite);
 
     if (bis_factory == nullptr)
-        bis_factory = std::make_unique<FileSys::BISFactory>(nand_directory);
+        bis_factory = std::make_unique<FileSys::BISFactory>(nand_directory, load_directory);
     if (save_data_factory == nullptr)
         save_data_factory = std::make_unique<FileSys::SaveDataFactory>(std::move(nand_directory));
     if (sdmc_factory == nullptr)
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index aab65a2b8..7039a2247 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -52,6 +52,8 @@ std::shared_ptr<FileSys::RegisteredCache> GetSystemNANDContents();
 std::shared_ptr<FileSys::RegisteredCache> GetUserNANDContents();
 std::shared_ptr<FileSys::RegisteredCache> GetSDMCContents();
 
+FileSys::VirtualDir GetModificationLoadRoot(u64 title_id);
+
 // Creates the SaveData, SDMC, and BIS Factories. Should be called once and before any function
 // above is called.
 void CreateFactories(const FileSys::VirtualFilesystem& vfs, bool overwrite = true);
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index a8e0c869f..7c6b0a4e6 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -89,7 +89,7 @@ private:
         controller_header.left_color_body = JOYCON_BODY_NEON_BLUE;
         controller_header.left_color_buttons = JOYCON_BUTTONS_NEON_BLUE;
 
-        for (size_t controller = 0; controller < mem.controllers.size(); controller++) {
+        for (std::size_t controller = 0; controller < mem.controllers.size(); controller++) {
             for (auto& layout : mem.controllers[controller].layouts) {
                 layout.header.num_entries = HID_NUM_ENTRIES;
                 layout.header.max_entry_index = HID_NUM_ENTRIES - 1;
@@ -313,7 +313,7 @@ public:
             {64, nullptr, "DeactivateJoySixAxisSensor"},
             {65, nullptr, "GetJoySixAxisSensorLifoHandle"},
             {66, &Hid::StartSixAxisSensor, "StartSixAxisSensor"},
-            {67, nullptr, "StopSixAxisSensor"},
+            {67, &Hid::StopSixAxisSensor, "StopSixAxisSensor"},
             {68, nullptr, "IsSixAxisSensorFusionEnabled"},
             {69, nullptr, "EnableSixAxisSensorFusion"},
             {70, nullptr, "SetSixAxisSensorFusionParameters"},
@@ -329,7 +329,7 @@ public:
             {80, nullptr, "GetGyroscopeZeroDriftMode"},
             {81, nullptr, "ResetGyroscopeZeroDriftMode"},
             {82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"},
-            {91, nullptr, "ActivateGesture"},
+            {91, &Hid::ActivateGesture, "ActivateGesture"},
             {100, &Hid::SetSupportedNpadStyleSet, "SetSupportedNpadStyleSet"},
             {101, &Hid::GetSupportedNpadStyleSet, "GetSupportedNpadStyleSet"},
             {102, &Hid::SetSupportedNpadIdType, "SetSupportedNpadIdType"},
@@ -338,7 +338,7 @@ public:
             {106, &Hid::AcquireNpadStyleSetUpdateEventHandle, "AcquireNpadStyleSetUpdateEventHandle"},
             {107, &Hid::DisconnectNpad, "DisconnectNpad"},
             {108, &Hid::GetPlayerLedPattern, "GetPlayerLedPattern"},
-            {109, nullptr, "ActivateNpadWithRevision"},
+            {109, &Hid::ActivateNpadWithRevision, "ActivateNpadWithRevision"},
             {120, &Hid::SetNpadJoyHoldType, "SetNpadJoyHoldType"},
             {121, &Hid::GetNpadJoyHoldType, "GetNpadJoyHoldType"},
             {122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, "SetNpadJoyAssignmentModeSingleByDefault"},
@@ -364,8 +364,8 @@ public:
             {208, nullptr, "GetActualVibrationGcErmCommand"},
             {209, nullptr, "BeginPermitVibrationSession"},
             {210, nullptr, "EndPermitVibrationSession"},
-            {300, nullptr, "ActivateConsoleSixAxisSensor"},
-            {301, nullptr, "StartConsoleSixAxisSensor"},
+            {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"},
+            {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"},
             {302, nullptr, "StopConsoleSixAxisSensor"},
             {303, nullptr, "ActivateSevenSixAxisSensor"},
             {304, nullptr, "StartSevenSixAxisSensor"},
@@ -579,6 +579,36 @@ private:
         rb.Push(RESULT_SUCCESS);
         LOG_WARNING(Service_HID, "(STUBBED) called");
     }
+
+    void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_WARNING(Service_HID, "(STUBBED) called");
+    }
+
+    void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_WARNING(Service_HID, "(STUBBED) called");
+    }
+
+    void StopSixAxisSensor(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_WARNING(Service_HID, "(STUBBED) called");
+    }
+
+    void ActivateGesture(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_WARNING(Service_HID, "(STUBBED) called");
+    }
+
+    void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_WARNING(Service_HID, "(STUBBED) called");
+    }
 };
 
 class HidDbg final : public ServiceFramework<HidDbg> {
diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp
index e587ad0d8..872e3c344 100644
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -2,6 +2,11 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/swap.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/shared_memory.h"
 #include "core/hle/service/hid/irs.h"
 
 namespace Service::HID {
@@ -9,28 +14,145 @@ namespace Service::HID {
 IRS::IRS() : ServiceFramework{"irs"} {
     // clang-format off
     static const FunctionInfo functions[] = {
-        {302, nullptr, "ActivateIrsensor"},
-        {303, nullptr, "DeactivateIrsensor"},
-        {304, nullptr, "GetIrsensorSharedMemoryHandle"},
-        {305, nullptr, "StopImageProcessor"},
-        {306, nullptr, "RunMomentProcessor"},
-        {307, nullptr, "RunClusteringProcessor"},
-        {308, nullptr, "RunImageTransferProcessor"},
-        {309, nullptr, "GetImageTransferProcessorState"},
-        {310, nullptr, "RunTeraPluginProcessor"},
-        {311, nullptr, "GetNpadIrCameraHandle"},
-        {312, nullptr, "RunPointingProcessor"},
-        {313, nullptr, "SuspendImageProcessor"},
-        {314, nullptr, "CheckFirmwareVersion"},
-        {315, nullptr, "SetFunctionLevel"},
-        {316, nullptr, "RunImageTransferExProcessor"},
-        {317, nullptr, "RunIrLedProcessor"},
-        {318, nullptr, "StopImageProcessorAsync"},
-        {319, nullptr, "ActivateIrsensorWithFunctionLevel"},
+        {302, &IRS::ActivateIrsensor, "ActivateIrsensor"},
+        {303, &IRS::DeactivateIrsensor, "DeactivateIrsensor"},
+        {304, &IRS::GetIrsensorSharedMemoryHandle, "GetIrsensorSharedMemoryHandle"},
+        {305, &IRS::StopImageProcessor, "StopImageProcessor"},
+        {306, &IRS::RunMomentProcessor, "RunMomentProcessor"},
+        {307, &IRS::RunClusteringProcessor, "RunClusteringProcessor"},
+        {308, &IRS::RunImageTransferProcessor, "RunImageTransferProcessor"},
+        {309, &IRS::GetImageTransferProcessorState, "GetImageTransferProcessorState"},
+        {310, &IRS::RunTeraPluginProcessor, "RunTeraPluginProcessor"},
+        {311, &IRS::GetNpadIrCameraHandle, "GetNpadIrCameraHandle"},
+        {312, &IRS::RunPointingProcessor, "RunPointingProcessor"},
+        {313, &IRS::SuspendImageProcessor, "SuspendImageProcessor"},
+        {314, &IRS::CheckFirmwareVersion, "CheckFirmwareVersion"},
+        {315, &IRS::SetFunctionLevel, "SetFunctionLevel"},
+        {316, &IRS::RunImageTransferExProcessor, "RunImageTransferExProcessor"},
+        {317, &IRS::RunIrLedProcessor, "RunIrLedProcessor"},
+        {318, &IRS::StopImageProcessorAsync, "StopImageProcessorAsync"},
+        {319, &IRS::ActivateIrsensorWithFunctionLevel, "ActivateIrsensorWithFunctionLevel"},
     };
     // clang-format on
 
     RegisterHandlers(functions);
+
+    auto& kernel = Core::System::GetInstance().Kernel();
+    shared_mem = Kernel::SharedMemory::Create(
+        kernel, nullptr, 0x8000, Kernel::MemoryPermission::ReadWrite,
+        Kernel::MemoryPermission::Read, 0, Kernel::MemoryRegion::BASE, "IRS:SharedMemory");
+}
+
+void IRS::ActivateIrsensor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::DeactivateIrsensor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::GetIrsensorSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2, 1};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushCopyObjects(shared_mem);
+    LOG_DEBUG(Service_IRS, "called");
+}
+
+void IRS::StopImageProcessor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::RunMomentProcessor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::RunClusteringProcessor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::RunImageTransferProcessor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 5};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushRaw<u64>(CoreTiming::GetTicks());
+    rb.PushRaw<u32>(0);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::RunTeraPluginProcessor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::GetNpadIrCameraHandle(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushRaw<u32>(device_handle);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::RunPointingProcessor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::SuspendImageProcessor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::CheckFirmwareVersion(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::SetFunctionLevel(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::RunImageTransferExProcessor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::RunIrLedProcessor(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::StopImageProcessorAsync(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
+}
+
+void IRS::ActivateIrsensorWithFunctionLevel(Kernel::HLERequestContext& ctx) {
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    LOG_WARNING(Service_IRS, "(STUBBED) called");
 }
 
 IRS::~IRS() = default;
diff --git a/src/core/hle/service/hid/irs.h b/src/core/hle/service/hid/irs.h
index 6fb16b45d..12de6bfb3 100644
--- a/src/core/hle/service/hid/irs.h
+++ b/src/core/hle/service/hid/irs.h
@@ -4,14 +4,41 @@
 
 #pragma once
 
+#include "core/hle/kernel/object.h"
 #include "core/hle/service/service.h"
 
+namespace Kernel {
+class SharedMemory;
+}
+
 namespace Service::HID {
 
 class IRS final : public ServiceFramework<IRS> {
 public:
     explicit IRS();
     ~IRS() override;
+
+private:
+    void ActivateIrsensor(Kernel::HLERequestContext& ctx);
+    void DeactivateIrsensor(Kernel::HLERequestContext& ctx);
+    void GetIrsensorSharedMemoryHandle(Kernel::HLERequestContext& ctx);
+    void StopImageProcessor(Kernel::HLERequestContext& ctx);
+    void RunMomentProcessor(Kernel::HLERequestContext& ctx);
+    void RunClusteringProcessor(Kernel::HLERequestContext& ctx);
+    void RunImageTransferProcessor(Kernel::HLERequestContext& ctx);
+    void GetImageTransferProcessorState(Kernel::HLERequestContext& ctx);
+    void RunTeraPluginProcessor(Kernel::HLERequestContext& ctx);
+    void GetNpadIrCameraHandle(Kernel::HLERequestContext& ctx);
+    void RunPointingProcessor(Kernel::HLERequestContext& ctx);
+    void SuspendImageProcessor(Kernel::HLERequestContext& ctx);
+    void CheckFirmwareVersion(Kernel::HLERequestContext& ctx);
+    void SetFunctionLevel(Kernel::HLERequestContext& ctx);
+    void RunImageTransferExProcessor(Kernel::HLERequestContext& ctx);
+    void RunIrLedProcessor(Kernel::HLERequestContext& ctx);
+    void StopImageProcessorAsync(Kernel::HLERequestContext& ctx);
+    void ActivateIrsensorWithFunctionLevel(Kernel::HLERequestContext& ctx);
+    Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;
+    const u32 device_handle{0xABCD};
 };
 
 class IRS_SYS final : public ServiceFramework<IRS_SYS> {
diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp
index 098da2a41..c89157a4d 100644
--- a/src/core/hle/service/lm/lm.cpp
+++ b/src/core/hle/service/lm/lm.cpp
@@ -99,7 +99,7 @@ private:
         std::string thread;
         while (addr < end_addr) {
             const Field field{static_cast<Field>(Memory::Read8(addr++))};
-            const size_t length{Memory::Read8(addr++)};
+            const std::size_t length{Memory::Read8(addr++)};
 
             if (static_cast<Field>(Memory::Read8(addr)) == Field::Skip) {
                 ++addr;
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index f8d2127d9..8c07a05c2 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/service/hid/hid.h"
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index ed4f5f539..10611ed6a 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -31,7 +31,7 @@ public:
             {1, &IRequest::GetResult, "GetResult"},
             {2, &IRequest::GetSystemEventReadableHandles, "GetSystemEventReadableHandles"},
             {3, &IRequest::Cancel, "Cancel"},
-            {4, nullptr, "Submit"},
+            {4, &IRequest::Submit, "Submit"},
             {5, nullptr, "SetRequirement"},
             {6, nullptr, "SetRequirementPreset"},
             {8, nullptr, "SetPriority"},
@@ -61,6 +61,12 @@ public:
     }
 
 private:
+    void Submit(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_NIFM, "(STUBBED) called");
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
     void GetRequestState(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_NIFM, "(STUBBED) called");
         IPC::ResponseBuilder rb{ctx, 3};
@@ -114,10 +120,11 @@ public:
 
 private:
     void GetClientId(Kernel::HLERequestContext& ctx) {
+        static constexpr u32 client_id = 1;
         LOG_WARNING(Service_NIFM, "(STUBBED) called");
         IPC::ResponseBuilder rb{ctx, 4};
         rb.Push(RESULT_SUCCESS);
-        rb.Push<u64>(0);
+        rb.Push<u64>(client_id); // Client ID needs to be non zero otherwise it's considered invalid
     }
     void CreateScanRequest(Kernel::HLERequestContext& ctx) {
         IPC::ResponseBuilder rb{ctx, 2, 0, 1};
@@ -141,10 +148,16 @@ private:
         rb.Push(RESULT_SUCCESS);
     }
     void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) {
-        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+        ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, "NetworkProfileData is not the correct size");
+        u128 uuid{};
+        auto buffer = ctx.ReadBuffer();
+        std::memcpy(&uuid, buffer.data() + 8, sizeof(u128));
+
+        IPC::ResponseBuilder rb{ctx, 6, 0, 1};
 
         rb.Push(RESULT_SUCCESS);
         rb.PushIpcInterface<INetworkProfile>();
+        rb.PushRaw<u128>(uuid);
 
         LOG_DEBUG(Service_NIFM, "called");
     }
diff --git a/src/core/hle/service/nim/nim.cpp b/src/core/hle/service/nim/nim.cpp
index bd05b0a70..261ad539c 100644
--- a/src/core/hle/service/nim/nim.cpp
+++ b/src/core/hle/service/nim/nim.cpp
@@ -2,6 +2,11 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <chrono>
+#include <ctime>
+#include "core/core.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/event.h"
 #include "core/hle/service/nim/nim.h"
 #include "core/hle/service/service.h"
 #include "core/hle/service/sm/sm.h"
@@ -100,19 +105,111 @@ public:
     }
 };
 
+class IEnsureNetworkClockAvailabilityService final
+    : public ServiceFramework<IEnsureNetworkClockAvailabilityService> {
+public:
+    IEnsureNetworkClockAvailabilityService()
+        : ServiceFramework("IEnsureNetworkClockAvailabilityService") {
+        static const FunctionInfo functions[] = {
+            {0, &IEnsureNetworkClockAvailabilityService::StartTask, "StartTask"},
+            {1, &IEnsureNetworkClockAvailabilityService::GetFinishNotificationEvent,
+             "GetFinishNotificationEvent"},
+            {2, &IEnsureNetworkClockAvailabilityService::GetResult, "GetResult"},
+            {3, &IEnsureNetworkClockAvailabilityService::Cancel, "Cancel"},
+            {4, &IEnsureNetworkClockAvailabilityService::IsProcessing, "IsProcessing"},
+            {5, &IEnsureNetworkClockAvailabilityService::GetServerTime, "GetServerTime"},
+        };
+        RegisterHandlers(functions);
+
+        auto& kernel = Core::System::GetInstance().Kernel();
+        finished_event =
+            Kernel::Event::Create(kernel, Kernel::ResetType::OneShot,
+                                  "IEnsureNetworkClockAvailabilityService:FinishEvent");
+    }
+
+private:
+    Kernel::SharedPtr<Kernel::Event> finished_event;
+
+    void StartTask(Kernel::HLERequestContext& ctx) {
+        // No need to connect to the internet, just finish the task straight away.
+        finished_event->Signal();
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_DEBUG(Service_NIM, "called");
+    }
+
+    void GetFinishNotificationEvent(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2, 1};
+        rb.Push(RESULT_SUCCESS);
+        rb.PushCopyObjects(finished_event);
+        LOG_DEBUG(Service_NIM, "called");
+    }
+
+    void GetResult(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_DEBUG(Service_NIM, "called");
+    }
+
+    void Cancel(Kernel::HLERequestContext& ctx) {
+        finished_event->Clear();
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_DEBUG(Service_NIM, "called");
+    }
+
+    void IsProcessing(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.PushRaw<u32>(0); // We instantly process the request
+        LOG_DEBUG(Service_NIM, "called");
+    }
+
+    void GetServerTime(Kernel::HLERequestContext& ctx) {
+        const s64 server_time{std::chrono::duration_cast<std::chrono::seconds>(
+                                  std::chrono::system_clock::now().time_since_epoch())
+                                  .count()};
+        IPC::ResponseBuilder rb{ctx, 4};
+        rb.Push(RESULT_SUCCESS);
+        rb.PushRaw<s64>(server_time);
+        LOG_DEBUG(Service_NIM, "called");
+    }
+};
+
 class NTC final : public ServiceFramework<NTC> {
 public:
     explicit NTC() : ServiceFramework{"ntc"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, nullptr, "OpenEnsureNetworkClockAvailabilityService"},
-            {100, nullptr, "SuspendAutonomicTimeCorrection"},
-            {101, nullptr, "ResumeAutonomicTimeCorrection"},
+            {0, &NTC::OpenEnsureNetworkClockAvailabilityService, "OpenEnsureNetworkClockAvailabilityService"},
+            {100, &NTC::SuspendAutonomicTimeCorrection, "SuspendAutonomicTimeCorrection"},
+            {101, &NTC::ResumeAutonomicTimeCorrection, "ResumeAutonomicTimeCorrection"},
         };
         // clang-format on
 
         RegisterHandlers(functions);
     }
+
+private:
+    void OpenEnsureNetworkClockAvailabilityService(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+        rb.Push(RESULT_SUCCESS);
+        rb.PushIpcInterface<IEnsureNetworkClockAvailabilityService>();
+        LOG_DEBUG(Service_NIM, "called");
+    }
+
+    // TODO(ogniK): Do we need these?
+    void SuspendAutonomicTimeCorrection(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_WARNING(Service_NIM, "(STUBBED) called");
+    }
+
+    void ResumeAutonomicTimeCorrection(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+        LOG_WARNING(Service_NIM, "(STUBBED) called");
+    }
 };
 
 void InstallInterfaces(SM::ServiceManager& sm) {
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp
index 447689a1a..4b2f758a8 100644
--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -78,7 +78,7 @@ enum class LoadState : u32 {
 };
 
 static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output,
-                              size_t& offset) {
+                              std::size_t& offset) {
     ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE,
                "Shared fonts exceeds 17mb!");
     ASSERT_MSG(input[0] == EXPECTED_MAGIC, "Failed to derive key, unexpected magic number");
@@ -95,7 +95,7 @@ static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& ou
 }
 
 static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& output,
-                              size_t& offset) {
+                              std::size_t& offset) {
     ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!");
     const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT;
     std::memcpy(output.data() + offset, &EXPECTED_RESULT, sizeof(u32)); // Magic header
@@ -113,7 +113,7 @@ static u32 GetU32Swapped(const u8* data) {
 }
 
 struct PL_U::Impl {
-    const FontRegion& GetSharedFontRegion(size_t index) const {
+    const FontRegion& GetSharedFontRegion(std::size_t index) const {
         if (index >= shared_font_regions.size() || shared_font_regions.empty()) {
             // No font fallback
             return EMPTY_REGION;
@@ -126,7 +126,7 @@ struct PL_U::Impl {
         // based on the shared memory dump
         unsigned cur_offset = 0;
 
-        for (size_t i = 0; i < SHARED_FONTS.size(); i++) {
+        for (std::size_t i = 0; i < SHARED_FONTS.size(); i++) {
             // Out of shared fonts/invalid font
             if (GetU32Swapped(input.data() + cur_offset) != EXPECTED_RESULT) {
                 break;
@@ -162,7 +162,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
     RegisterHandlers(functions);
     // Attempt to load shared font data from disk
     const auto nand = FileSystem::GetSystemNANDContents();
-    size_t offset = 0;
+    std::size_t offset = 0;
     // Rebuild shared fonts from data ncas
     if (nand->HasEntry(static_cast<u64>(FontArchives::Standard),
                        FileSys::ContentRecordType::Data)) {
@@ -317,9 +317,9 @@ void PL_U::GetSharedMemoryAddressOffset(Kernel::HLERequestContext& ctx) {
 
 void PL_U::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx) {
     // Map backing memory for the font data
-    Core::CurrentProcess()->vm_manager.MapMemoryBlock(SHARED_FONT_MEM_VADDR, impl->shared_font, 0,
-                                                      SHARED_FONT_MEM_SIZE,
-                                                      Kernel::MemoryState::Shared);
+    Core::CurrentProcess()->VMManager().MapMemoryBlock(SHARED_FONT_MEM_VADDR, impl->shared_font, 0,
+                                                       SHARED_FONT_MEM_SIZE,
+                                                       Kernel::MemoryState::Shared);
 
     // Create shared font memory object
     auto& kernel = Core::System::GetInstance().Kernel();
@@ -344,7 +344,7 @@ void PL_U::GetSharedFontInOrderOfPriority(Kernel::HLERequestContext& ctx) {
     std::vector<u32> font_sizes;
 
     // TODO(ogniK): Have actual priority order
-    for (size_t i = 0; i < impl->shared_font_regions.size(); i++) {
+    for (std::size_t i = 0; i < impl->shared_font_regions.size(); i++) {
         font_codes.push_back(static_cast<u32>(i));
         auto region = impl->GetSharedFontRegion(i);
         font_offsets.push_back(region.offset);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 25d5a93fa..d8b8037a8 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -71,7 +71,7 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>&
 }
 
 u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output) {
-    size_t num_entries = input.size() / sizeof(IoctlRemapEntry);
+    std::size_t num_entries = input.size() / sizeof(IoctlRemapEntry);
 
     LOG_WARNING(Service_NVDRV, "(STUBBED) called, num_entries=0x{:X}", num_entries);
 
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 7455ddd19..d47b6f659 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -23,7 +23,7 @@
 
 namespace Service::NVFlinger {
 
-constexpr size_t SCREEN_REFRESH_RATE = 60;
+constexpr std::size_t SCREEN_REFRESH_RATE = 60;
 constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
 
 NVFlinger::NVFlinger() {
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 9bb7c7b26..62f049660 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -50,6 +50,7 @@
 #include "core/hle/service/nim/nim.h"
 #include "core/hle/service/ns/ns.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
+#include "core/hle/service/nvflinger/nvflinger.h"
 #include "core/hle/service/pcie/pcie.h"
 #include "core/hle/service/pctl/pctl.h"
 #include "core/hle/service/pcv/pcv.h"
@@ -58,7 +59,6 @@
 #include "core/hle/service/psc/psc.h"
 #include "core/hle/service/service.h"
 #include "core/hle/service/set/settings.h"
-#include "core/hle/service/sm/controller.h"
 #include "core/hle/service/sm/sm.h"
 #include "core/hle/service/sockets/sockets.h"
 #include "core/hle/service/spl/module.h"
@@ -129,9 +129,9 @@ Kernel::SharedPtr<Kernel::ClientPort> ServiceFrameworkBase::CreatePort() {
     return client_port;
 }
 
-void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, size_t n) {
+void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n) {
     handlers.reserve(handlers.size() + n);
-    for (size_t i = 0; i < n; ++i) {
+    for (std::size_t i = 0; i < n; ++i) {
         // Usually this array is sorted by id already, so hint to insert at the end
         handlers.emplace_hint(handlers.cend(), functions[i].expected_header, functions[i]);
     }
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 7a051523e..2fc57a82e 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -88,7 +88,7 @@ private:
     ServiceFrameworkBase(const char* service_name, u32 max_sessions, InvokerFn* handler_invoker);
     ~ServiceFrameworkBase();
 
-    void RegisterHandlersBase(const FunctionInfoBase* functions, size_t n);
+    void RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n);
     void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info);
 
     /// Identifier string used to connect to the service.
@@ -152,7 +152,7 @@ protected:
         : ServiceFrameworkBase(service_name, max_sessions, Invoker) {}
 
     /// Registers handlers in the service.
-    template <size_t N>
+    template <std::size_t N>
     void RegisterHandlers(const FunctionInfo (&functions)[N]) {
         RegisterHandlers(functions, N);
     }
@@ -161,7 +161,7 @@ protected:
      * Registers handlers in the service. Usually prefer using the other RegisterHandlers
      * overload in order to avoid needing to specify the array size.
      */
-    void RegisterHandlers(const FunctionInfo* functions, size_t n) {
+    void RegisterHandlers(const FunctionInfo* functions, std::size_t n) {
         RegisterHandlersBase(functions, n);
     }
 
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index 59eb20155..9e5af7839 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -32,21 +32,21 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{
     LanguageCode::ZH_HANT,
 }};
 
-constexpr size_t pre4_0_0_max_entries = 0xF;
-constexpr size_t post4_0_0_max_entries = 0x40;
+constexpr std::size_t pre4_0_0_max_entries = 0xF;
+constexpr std::size_t post4_0_0_max_entries = 0x40;
 
-LanguageCode GetLanguageCodeFromIndex(size_t index) {
+LanguageCode GetLanguageCodeFromIndex(std::size_t index) {
     return available_language_codes.at(index);
 }
 
-template <size_t size>
+template <std::size_t size>
 static std::array<LanguageCode, size> MakeLanguageCodeSubset() {
     std::array<LanguageCode, size> arr;
     std::copy_n(available_language_codes.begin(), size, arr.begin());
     return arr;
 }
 
-static void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, size_t max_size) {
+static void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t max_size) {
     IPC::ResponseBuilder rb{ctx, 3};
     rb.Push(RESULT_SUCCESS);
     if (available_language_codes.size() > max_size)
diff --git a/src/core/hle/service/set/set.h b/src/core/hle/service/set/set.h
index 5f0214359..266f13e46 100644
--- a/src/core/hle/service/set/set.h
+++ b/src/core/hle/service/set/set.h
@@ -28,7 +28,7 @@ enum class LanguageCode : u64 {
     ZH_HANS = 0x00736E61482D687A,
     ZH_HANT = 0x00746E61482D687A,
 };
-LanguageCode GetLanguageCodeFromIndex(size_t idx);
+LanguageCode GetLanguageCodeFromIndex(std::size_t idx);
 
 class SET final : public ServiceFramework<SET> {
 public:
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
index cdf328a26..98f6e4111 100644
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -2,8 +2,11 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/client_session.h"
+#include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"
 #include "core/hle/service/sm/controller.h"
 
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 18d1641b8..464e79d01 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -15,6 +15,10 @@
 
 namespace Service::SM {
 
+constexpr ResultCode ERR_ALREADY_REGISTERED(ErrorModule::SM, 4);
+constexpr ResultCode ERR_INVALID_NAME(ErrorModule::SM, 6);
+constexpr ResultCode ERR_SERVICE_NOT_REGISTERED(ErrorModule::SM, 7);
+
 ServiceManager::ServiceManager() = default;
 ServiceManager::~ServiceManager() = default;
 
@@ -24,10 +28,10 @@ void ServiceManager::InvokeControlRequest(Kernel::HLERequestContext& context) {
 
 static ResultCode ValidateServiceName(const std::string& name) {
     if (name.size() <= 0 || name.size() > 8) {
-        return ERR_INVALID_NAME_SIZE;
+        return ERR_INVALID_NAME;
     }
     if (name.find('\0') != std::string::npos) {
-        return ERR_NAME_CONTAINS_NUL;
+        return ERR_INVALID_NAME;
     }
     return RESULT_SUCCESS;
 }
@@ -104,7 +108,7 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {
 
     auto client_port = service_manager->GetServicePort(name);
     if (client_port.Failed()) {
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(client_port.Code());
         LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, client_port.Code().raw);
         if (name.length() == 0)
@@ -117,8 +121,7 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {
     ASSERT(session.Succeeded());
     if (session.Succeeded()) {
         LOG_DEBUG(Service_SM, "called service={} -> session={}", name, (*session)->GetObjectId());
-        IPC::ResponseBuilder rb =
-            rp.MakeBuilder(2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles);
+        IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
         rb.Push(session.Code());
         rb.PushMoveObjects(std::move(session).Unwrap());
     }
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index a58d922a0..da2c51082 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -36,12 +36,6 @@ private:
     std::shared_ptr<ServiceManager> service_manager;
 };
 
-constexpr ResultCode ERR_SERVICE_NOT_REGISTERED(-1);
-constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED(-1);
-constexpr ResultCode ERR_INVALID_NAME_SIZE(-1);
-constexpr ResultCode ERR_NAME_CONTAINS_NUL(-1);
-constexpr ResultCode ERR_ALREADY_REGISTERED(-1);
-
 class ServiceManager {
 public:
     static void InstallInterfaces(std::shared_ptr<ServiceManager> self);
diff --git a/src/core/hle/service/spl/module.cpp b/src/core/hle/service/spl/module.cpp
index 0d8441fb1..44a6717d0 100644
--- a/src/core/hle/service/spl/module.cpp
+++ b/src/core/hle/service/spl/module.cpp
@@ -21,7 +21,7 @@ Module::Interface::~Interface() = default;
 void Module::Interface::GetRandomBytes(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
 
-    size_t size = ctx.GetWriteBufferSize();
+    std::size_t size = ctx.GetWriteBufferSize();
 
     std::vector<u8> data(size);
     std::generate(data.begin(), data.end(), std::rand);
diff --git a/src/core/hle/service/ssl/ssl.cpp b/src/core/hle/service/ssl/ssl.cpp
index 63b86e099..bc4f7a437 100644
--- a/src/core/hle/service/ssl/ssl.cpp
+++ b/src/core/hle/service/ssl/ssl.cpp
@@ -71,7 +71,7 @@ private:
         LOG_WARNING(Service_SSL, "(STUBBED) called");
         IPC::RequestParser rp{ctx};
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
     }
 
@@ -103,6 +103,7 @@ public:
     }
 
 private:
+    u32 ssl_version{};
     void CreateContext(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_SSL, "(STUBBED) called");
 
@@ -112,10 +113,9 @@ private:
     }
 
     void SetInterfaceVersion(Kernel::HLERequestContext& ctx) {
-        LOG_WARNING(Service_SSL, "(STUBBED) called");
+        LOG_DEBUG(Service_SSL, "called");
         IPC::RequestParser rp{ctx};
-        u32 unk1 = rp.Pop<u32>(); // Probably minor/major?
-        u32 unk2 = rp.Pop<u32>(); // TODO(ogniK): Figure out what this does
+        ssl_version = rp.Pop<u32>();
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 015b42cfd..bbc02abcc 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -4,25 +4,28 @@
 
 #include <algorithm>
 #include <array>
+#include <cstring>
 #include <memory>
 #include <type_traits>
 #include <utility>
 #include <boost/optional.hpp>
 #include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_funcs.h"
+#include "common/logging/log.h"
 #include "common/math_util.h"
-#include "common/scope_exit.h"
+#include "common/swap.h"
 #include "core/core_timing.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
+#include "core/hle/service/nvflinger/nvflinger.h"
 #include "core/hle/service/vi/vi.h"
 #include "core/hle/service/vi/vi_m.h"
 #include "core/hle/service/vi/vi_s.h"
 #include "core/hle/service/vi/vi_u.h"
 #include "core/settings.h"
-#include "video_core/renderer_base.h"
-#include "video_core/video_core.h"
 
 namespace Service::VI {
 
@@ -38,7 +41,7 @@ static_assert(sizeof(DisplayInfo) == 0x60, "DisplayInfo has wrong size");
 class Parcel {
 public:
     // This default size was chosen arbitrarily.
-    static constexpr size_t DefaultBufferSize = 0x40;
+    static constexpr std::size_t DefaultBufferSize = 0x40;
     Parcel() : buffer(DefaultBufferSize) {}
     explicit Parcel(std::vector<u8> data) : buffer(std::move(data)) {}
     virtual ~Parcel() = default;
@@ -66,7 +69,7 @@ public:
         return val;
     }
 
-    std::vector<u8> ReadBlock(size_t length) {
+    std::vector<u8> ReadBlock(std::size_t length) {
         ASSERT(read_index + length <= buffer.size());
         const u8* const begin = buffer.data() + read_index;
         const u8* const end = begin + length;
@@ -156,8 +159,8 @@ private:
     static_assert(sizeof(Header) == 16, "ParcelHeader has wrong size");
 
     std::vector<u8> buffer;
-    size_t read_index = 0;
-    size_t write_index = 0;
+    std::size_t read_index = 0;
+    std::size_t write_index = 0;
 };
 
 class NativeWindow : public Parcel {
@@ -647,7 +650,7 @@ private:
         u64 layer_id = rp.Pop<u64>();
         u64 z_value = rp.Pop<u64>();
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
     }
 
@@ -655,7 +658,7 @@ private:
         IPC::RequestParser rp{ctx};
         u64 layer_id = rp.Pop<u64>();
         bool visibility = rp.Pop<bool>();
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
         LOG_WARNING(Service_VI, "(STUBBED) called, layer_id=0x{:08X}, visibility={}", layer_id,
                     visibility);
@@ -762,7 +765,7 @@ private:
         IPC::RequestParser rp{ctx};
         u64 display = rp.Pop<u64>();
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
     }
 
@@ -776,7 +779,7 @@ private:
 
         u64 layer_id = nv_flinger->CreateLayer(display);
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(4, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 4};
         rb.Push(RESULT_SUCCESS);
         rb.Push(layer_id);
     }
@@ -787,7 +790,7 @@ private:
         u32 stack = rp.Pop<u32>();
         u64 layer_id = rp.Pop<u64>();
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
     }
 
@@ -795,7 +798,7 @@ private:
         IPC::RequestParser rp{ctx};
         u64 layer_id = rp.Pop<u64>();
         bool visibility = rp.Pop<bool>();
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
         LOG_WARNING(Service_VI, "(STUBBED) called, layer_id=0x{:X}, visibility={}", layer_id,
                     visibility);
@@ -852,7 +855,7 @@ private:
 
         ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet");
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(4, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 4};
         rb.Push(RESULT_SUCCESS);
         rb.Push<u64>(nv_flinger->OpenDisplay(name));
     }
@@ -862,7 +865,7 @@ private:
         IPC::RequestParser rp{ctx};
         u64 display_id = rp.Pop<u64>();
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
     }
 
@@ -871,7 +874,7 @@ private:
         IPC::RequestParser rp{ctx};
         u64 display_id = rp.Pop<u64>();
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(6, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 6};
         rb.Push(RESULT_SUCCESS);
 
         if (Settings::values.use_docked_mode) {
@@ -889,7 +892,7 @@ private:
         u32 scaling_mode = rp.Pop<u32>();
         u64 unknown = rp.Pop<u64>();
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
     }
 
@@ -897,7 +900,7 @@ private:
         IPC::RequestParser rp{ctx};
         DisplayInfo display_info;
         ctx.WriteBuffer(&display_info, sizeof(DisplayInfo));
-        IPC::ResponseBuilder rb = rp.MakeBuilder(4, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 4};
         rb.Push(RESULT_SUCCESS);
         rb.Push<u64>(1);
         LOG_WARNING(Service_VI, "(STUBBED) called");
@@ -918,7 +921,7 @@ private:
         u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id);
 
         NativeWindow native_window{buffer_queue_id};
-        IPC::ResponseBuilder rb = rp.MakeBuilder(4, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 4};
         rb.Push(RESULT_SUCCESS);
         rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
     }
@@ -937,7 +940,7 @@ private:
         u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id);
 
         NativeWindow native_window{buffer_queue_id};
-        IPC::ResponseBuilder rb = rp.MakeBuilder(6, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 6};
         rb.Push(RESULT_SUCCESS);
         rb.Push(layer_id);
         rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
@@ -949,7 +952,7 @@ private:
         IPC::RequestParser rp{ctx};
         u64 layer_id = rp.Pop<u64>();
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 0, 0);
+        IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(RESULT_SUCCESS);
     }
 
@@ -960,7 +963,7 @@ private:
 
         auto vsync_event = nv_flinger->GetVsyncEvent(display_id);
 
-        IPC::ResponseBuilder rb = rp.MakeBuilder(2, 1, 0);
+        IPC::ResponseBuilder rb{ctx, 2, 1};
         rb.Push(RESULT_SUCCESS);
         rb.PushCopyObjects(vsync_event);
     }
diff --git a/src/core/hle/service/vi/vi.h b/src/core/hle/service/vi/vi.h
index c2dc83605..e3963502a 100644
--- a/src/core/hle/service/vi/vi.h
+++ b/src/core/hle/service/vi/vi.h
@@ -4,11 +4,10 @@
 
 #pragma once
 
-#include "core/hle/service/nvflinger/nvflinger.h"
 #include "core/hle/service/service.h"
 
-namespace CoreTiming {
-struct EventType;
+namespace Service::NVFlinger {
+class NVFlinger;
 }
 
 namespace Service::VI {
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index 2b8f78136..c1824b9c3 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -14,11 +14,9 @@
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/resource_limit.h"
 #include "core/hle/service/filesystem/filesystem.h"
 #include "core/loader/deconstructed_rom_directory.h"
 #include "core/loader/nso.h"
-#include "core/memory.h"
 
 namespace Loader {
 
@@ -88,8 +86,7 @@ FileType AppLoader_DeconstructedRomDirectory::IdentifyType(const FileSys::Virtua
     return FileType::Error;
 }
 
-ResultStatus AppLoader_DeconstructedRomDirectory::Load(
-    Kernel::SharedPtr<Kernel::Process>& process) {
+ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) {
     if (is_loaded) {
         return ResultStatus::ErrorAlreadyLoaded;
     }
@@ -127,12 +124,16 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(
     metadata.Print();
 
     const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()};
-    if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit) {
+    if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit ||
+        arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) {
         return ResultStatus::Error32BitISA;
     }
 
+    process.LoadFromMetadata(metadata);
+
     // Load NSO modules
-    VAddr next_load_addr{Memory::PROCESS_IMAGE_VADDR};
+    const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
+    VAddr next_load_addr = base_address;
     for (const auto& module : {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", "subsdk3",
                                "subsdk4", "subsdk5", "subsdk6", "subsdk7", "sdk"}) {
         const FileSys::VirtualFile module_file = dir->GetFile(module);
@@ -145,13 +146,7 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(
         }
     }
 
-    auto& kernel = Core::System::GetInstance().Kernel();
-    process->program_id = metadata.GetTitleID();
-    process->svc_access_mask.set();
-    process->resource_limit =
-        kernel.ResourceLimitForCategory(Kernel::ResourceLimitCategory::APPLICATION);
-    process->Run(Memory::PROCESS_IMAGE_VADDR, metadata.GetMainThreadPriority(),
-                 metadata.GetMainThreadStackSize());
+    process.Run(base_address, metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize());
 
     // Find the RomFS by searching for a ".romfs" file in this directory
     const auto& files = dir->GetFiles();
diff --git a/src/core/loader/deconstructed_rom_directory.h b/src/core/loader/deconstructed_rom_directory.h
index 8a0dc1b1e..d109ed2b5 100644
--- a/src/core/loader/deconstructed_rom_directory.h
+++ b/src/core/loader/deconstructed_rom_directory.h
@@ -7,7 +7,6 @@
 #include <string>
 #include "common/common_types.h"
 #include "core/file_sys/program_metadata.h"
-#include "core/hle/kernel/object.h"
 #include "core/loader/loader.h"
 
 namespace Loader {
@@ -38,7 +37,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
+    ResultStatus Load(Kernel::Process& process) override;
 
     ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
     ResultStatus ReadIcon(std::vector<u8>& buffer) override;
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 120e1e133..e67b49fc9 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -12,7 +12,7 @@
 #include "core/core.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/resource_limit.h"
+#include "core/hle/kernel/vm_manager.h"
 #include "core/loader/elf.h"
 #include "core/memory.h"
 
@@ -189,7 +189,7 @@ private:
 
     u32* sectionAddrs;
     bool relocate;
-    u32 entryPoint;
+    VAddr entryPoint;
 
 public:
     explicit ElfReader(void* ptr);
@@ -205,13 +205,13 @@ public:
     ElfMachine GetMachine() const {
         return (ElfMachine)(header->e_machine);
     }
-    u32 GetEntryPoint() const {
+    VAddr GetEntryPoint() const {
         return entryPoint;
     }
     u32 GetFlags() const {
         return (u32)(header->e_flags);
     }
-    SharedPtr<CodeSet> LoadInto(u32 vaddr);
+    SharedPtr<CodeSet> LoadInto(VAddr vaddr);
 
     int GetNumSegments() const {
         return (int)(header->e_phnum);
@@ -274,7 +274,7 @@ const char* ElfReader::GetSectionName(int section) const {
     return nullptr;
 }
 
-SharedPtr<CodeSet> ElfReader::LoadInto(u32 vaddr) {
+SharedPtr<CodeSet> ElfReader::LoadInto(VAddr vaddr) {
     LOG_DEBUG(Loader, "String section: {}", header->e_shstrndx);
 
     // Should we relocate?
@@ -289,24 +289,24 @@ SharedPtr<CodeSet> ElfReader::LoadInto(u32 vaddr) {
     LOG_DEBUG(Loader, "{} segments:", header->e_phnum);
 
     // First pass : Get the bits into RAM
-    u32 base_addr = relocate ? vaddr : 0;
+    const VAddr base_addr = relocate ? vaddr : 0;
 
-    u32 total_image_size = 0;
+    u64 total_image_size = 0;
     for (unsigned int i = 0; i < header->e_phnum; ++i) {
-        Elf32_Phdr* p = &segments[i];
+        const Elf32_Phdr* p = &segments[i];
         if (p->p_type == PT_LOAD) {
             total_image_size += (p->p_memsz + 0xFFF) & ~0xFFF;
         }
     }
 
     std::vector<u8> program_image(total_image_size);
-    size_t current_image_position = 0;
+    std::size_t current_image_position = 0;
 
     auto& kernel = Core::System::GetInstance().Kernel();
     SharedPtr<CodeSet> codeset = CodeSet::Create(kernel, "");
 
     for (unsigned int i = 0; i < header->e_phnum; ++i) {
-        Elf32_Phdr* p = &segments[i];
+        const Elf32_Phdr* p = &segments[i];
         LOG_DEBUG(Loader, "Type: {} Vaddr: {:08X} Filesz: {:08X} Memsz: {:08X} ", p->p_type,
                   p->p_vaddr, p->p_filesz, p->p_memsz);
 
@@ -333,8 +333,8 @@ SharedPtr<CodeSet> ElfReader::LoadInto(u32 vaddr) {
                 continue;
             }
 
-            u32 segment_addr = base_addr + p->p_vaddr;
-            u32 aligned_size = (p->p_memsz + 0xFFF) & ~0xFFF;
+            const VAddr segment_addr = base_addr + p->p_vaddr;
+            const u32 aligned_size = (p->p_memsz + 0xFFF) & ~0xFFF;
 
             codeset_segment->offset = current_image_position;
             codeset_segment->addr = segment_addr;
@@ -387,7 +387,7 @@ FileType AppLoader_ELF::IdentifyType(const FileSys::VirtualFile& file) {
     return FileType::Error;
 }
 
-ResultStatus AppLoader_ELF::Load(Kernel::SharedPtr<Kernel::Process>& process) {
+ResultStatus AppLoader_ELF::Load(Kernel::Process& process) {
     if (is_loaded)
         return ResultStatus::ErrorAlreadyLoaded;
 
@@ -395,19 +395,13 @@ ResultStatus AppLoader_ELF::Load(Kernel::SharedPtr<Kernel::Process>& process) {
     if (buffer.size() != file->GetSize())
         return ResultStatus::ErrorIncorrectELFFileSize;
 
+    const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
     ElfReader elf_reader(&buffer[0]);
-    SharedPtr<CodeSet> codeset = elf_reader.LoadInto(Memory::PROCESS_IMAGE_VADDR);
+    SharedPtr<CodeSet> codeset = elf_reader.LoadInto(base_address);
     codeset->name = file->GetName();
 
-    process->LoadModule(codeset, codeset->entrypoint);
-    process->svc_access_mask.set();
-
-    // Attach the default resource limit (APPLICATION) to the process
-    auto& kernel = Core::System::GetInstance().Kernel();
-    process->resource_limit =
-        kernel.ResourceLimitForCategory(Kernel::ResourceLimitCategory::APPLICATION);
-
-    process->Run(codeset->entrypoint, 48, Memory::DEFAULT_STACK_SIZE);
+    process.LoadModule(codeset, codeset->entrypoint);
+    process.Run(codeset->entrypoint, 48, Memory::DEFAULT_STACK_SIZE);
 
     is_loaded = true;
     return ResultStatus::Success;
diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h
index b8fb982d0..6af76441c 100644
--- a/src/core/loader/elf.h
+++ b/src/core/loader/elf.h
@@ -8,9 +8,6 @@
 #include "common/common_types.h"
 #include "core/loader/loader.h"
 
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Loader namespace
-
 namespace Loader {
 
 /// Loads an ELF/AXF file
@@ -29,7 +26,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
+    ResultStatus Load(Kernel::Process& process) override;
 };
 
 } // namespace Loader
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index fa43a2650..f2a183ba1 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -155,7 +155,7 @@ constexpr std::array<const char*, 58> RESULT_MESSAGES{
 };
 
 std::ostream& operator<<(std::ostream& os, ResultStatus status) {
-    os << RESULT_MESSAGES.at(static_cast<size_t>(status));
+    os << RESULT_MESSAGES.at(static_cast<std::size_t>(status));
     return os;
 }
 
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index 843c4bb91..20e66109b 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -12,7 +12,6 @@
 #include <boost/optional.hpp>
 #include "common/common_types.h"
 #include "core/file_sys/vfs.h"
-#include "core/hle/kernel/object.h"
 
 namespace Kernel {
 struct AddressMapping;
@@ -136,7 +135,7 @@ public:
      * @param process The newly created process.
      * @return The status result of the operation.
      */
-    virtual ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) = 0;
+    virtual ResultStatus Load(Kernel::Process& process) = 0;
 
     /**
      * Loads the system mode that this application needs.
diff --git a/src/core/loader/nax.cpp b/src/core/loader/nax.cpp
index b46d81c02..073fb9d2f 100644
--- a/src/core/loader/nax.cpp
+++ b/src/core/loader/nax.cpp
@@ -11,6 +11,20 @@
 #include "core/loader/nca.h"
 
 namespace Loader {
+namespace {
+FileType IdentifyTypeImpl(const FileSys::NAX& nax) {
+    if (nax.GetStatus() != ResultStatus::Success) {
+        return FileType::Error;
+    }
+
+    const auto nca = nax.AsNCA();
+    if (nca == nullptr || nca->GetStatus() != ResultStatus::Success) {
+        return FileType::Error;
+    }
+
+    return FileType::NAX;
+}
+} // Anonymous namespace
 
 AppLoader_NAX::AppLoader_NAX(FileSys::VirtualFile file)
     : AppLoader(file), nax(std::make_unique<FileSys::NAX>(file)),
@@ -19,17 +33,15 @@ AppLoader_NAX::AppLoader_NAX(FileSys::VirtualFile file)
 AppLoader_NAX::~AppLoader_NAX() = default;
 
 FileType AppLoader_NAX::IdentifyType(const FileSys::VirtualFile& file) {
-    FileSys::NAX nax(file);
-
-    if (nax.GetStatus() == ResultStatus::Success && nax.AsNCA() != nullptr &&
-        nax.AsNCA()->GetStatus() == ResultStatus::Success) {
-        return FileType::NAX;
-    }
+    const FileSys::NAX nax(file);
+    return IdentifyTypeImpl(nax);
+}
 
-    return FileType::Error;
+FileType AppLoader_NAX::GetFileType() {
+    return IdentifyTypeImpl(*nax);
 }
 
-ResultStatus AppLoader_NAX::Load(Kernel::SharedPtr<Kernel::Process>& process) {
+ResultStatus AppLoader_NAX::Load(Kernel::Process& process) {
     if (is_loaded) {
         return ResultStatus::ErrorAlreadyLoaded;
     }
diff --git a/src/core/loader/nax.h b/src/core/loader/nax.h
index 4dbae2918..fc3c01876 100644
--- a/src/core/loader/nax.h
+++ b/src/core/loader/nax.h
@@ -31,11 +31,9 @@ public:
      */
     static FileType IdentifyType(const FileSys::VirtualFile& file);
 
-    FileType GetFileType() override {
-        return IdentifyType(file);
-    }
+    FileType GetFileType() override;
 
-    ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
+    ResultStatus Load(Kernel::Process& process) override;
 
     ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
     ResultStatus ReadProgramId(u64& out_program_id) override;
diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp
index 6aaffae59..7e1b0d84f 100644
--- a/src/core/loader/nca.cpp
+++ b/src/core/loader/nca.cpp
@@ -30,7 +30,7 @@ FileType AppLoader_NCA::IdentifyType(const FileSys::VirtualFile& file) {
     return FileType::Error;
 }
 
-ResultStatus AppLoader_NCA::Load(Kernel::SharedPtr<Kernel::Process>& process) {
+ResultStatus AppLoader_NCA::Load(Kernel::Process& process) {
     if (is_loaded) {
         return ResultStatus::ErrorAlreadyLoaded;
     }
diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h
index 10be197c4..95d9b73a1 100644
--- a/src/core/loader/nca.h
+++ b/src/core/loader/nca.h
@@ -6,7 +6,6 @@
 
 #include "common/common_types.h"
 #include "core/file_sys/vfs.h"
-#include "core/hle/kernel/object.h"
 #include "core/loader/loader.h"
 
 namespace FileSys {
@@ -34,7 +33,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
+    ResultStatus Load(Kernel::Process& process) override;
 
     ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
     u64 ReadRomFSIVFCOffset() const override;
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index c49ec34ab..c10f826a4 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -16,7 +16,7 @@
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/resource_limit.h"
+#include "core/hle/kernel/vm_manager.h"
 #include "core/loader/nro.h"
 #include "core/memory.h"
 
@@ -175,23 +175,19 @@ bool AppLoader_NRO::LoadNro(FileSys::VirtualFile file, VAddr load_base) {
     return true;
 }
 
-ResultStatus AppLoader_NRO::Load(Kernel::SharedPtr<Kernel::Process>& process) {
+ResultStatus AppLoader_NRO::Load(Kernel::Process& process) {
     if (is_loaded) {
         return ResultStatus::ErrorAlreadyLoaded;
     }
 
     // Load NRO
-    static constexpr VAddr base_addr{Memory::PROCESS_IMAGE_VADDR};
+    const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
 
-    if (!LoadNro(file, base_addr)) {
+    if (!LoadNro(file, base_address)) {
         return ResultStatus::ErrorLoadingNRO;
     }
 
-    auto& kernel = Core::System::GetInstance().Kernel();
-    process->svc_access_mask.set();
-    process->resource_limit =
-        kernel.ResourceLimitForCategory(Kernel::ResourceLimitCategory::APPLICATION);
-    process->Run(base_addr, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);
+    process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);
 
     is_loaded = true;
     return ResultStatus::Success;
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 96d2de305..04b46119a 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -6,7 +6,6 @@
 
 #include <string>
 #include "common/common_types.h"
-#include "core/hle/kernel/object.h"
 #include "core/loader/linker.h"
 #include "core/loader/loader.h"
 
@@ -33,7 +32,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
+    ResultStatus Load(Kernel::Process& process) override;
 
     ResultStatus ReadIcon(std::vector<u8>& buffer) override;
     ResultStatus ReadProgramId(u64& out_program_id) override;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 3c6306818..cbe2a3e53 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -13,7 +13,7 @@
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/resource_limit.h"
+#include "core/hle/kernel/vm_manager.h"
 #include "core/loader/nso.h"
 #include "core/memory.h"
 
@@ -32,11 +32,18 @@ static_assert(sizeof(NsoSegmentHeader) == 0x10, "NsoSegmentHeader has incorrect
 
 struct NsoHeader {
     u32_le magic;
-    INSERT_PADDING_BYTES(0xc);
+    u32_le version;
+    INSERT_PADDING_WORDS(1);
+    u8 flags;
     std::array<NsoSegmentHeader, 3> segments; // Text, RoData, Data (in that order)
     u32_le bss_size;
     INSERT_PADDING_BYTES(0x1c);
     std::array<u32_le, 3> segments_compressed_size;
+
+    bool IsSegmentCompressed(size_t segment_num) const {
+        ASSERT_MSG(segment_num < 3, "Invalid segment {}", segment_num);
+        return ((flags >> segment_num) & 1);
+    }
 };
 static_assert(sizeof(NsoHeader) == 0x6c, "NsoHeader has incorrect size.");
 static_assert(std::is_trivially_copyable_v<NsoHeader>, "NsoHeader isn't trivially copyable.");
@@ -105,9 +112,11 @@ VAddr AppLoader_NSO::LoadModule(FileSys::VirtualFile file, VAddr load_base) {
     Kernel::SharedPtr<Kernel::CodeSet> codeset = Kernel::CodeSet::Create(kernel, "");
     std::vector<u8> program_image;
     for (std::size_t i = 0; i < nso_header.segments.size(); ++i) {
-        const std::vector<u8> compressed_data =
+        std::vector<u8> data =
             file->ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset);
-        std::vector<u8> data = DecompressSegment(compressed_data, nso_header.segments[i]);
+        if (nso_header.IsSegmentCompressed(i)) {
+            data = DecompressSegment(data, nso_header.segments[i]);
+        }
         program_image.resize(nso_header.segments[i].location);
         program_image.insert(program_image.end(), data.begin(), data.end());
         codeset->segments[i].addr = nso_header.segments[i].location;
@@ -144,21 +153,17 @@ VAddr AppLoader_NSO::LoadModule(FileSys::VirtualFile file, VAddr load_base) {
     return load_base + image_size;
 }
 
-ResultStatus AppLoader_NSO::Load(Kernel::SharedPtr<Kernel::Process>& process) {
+ResultStatus AppLoader_NSO::Load(Kernel::Process& process) {
     if (is_loaded) {
         return ResultStatus::ErrorAlreadyLoaded;
     }
 
     // Load module
-    LoadModule(file, Memory::PROCESS_IMAGE_VADDR);
-    LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), Memory::PROCESS_IMAGE_VADDR);
+    const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
+    LoadModule(file, base_address);
+    LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), base_address);
 
-    auto& kernel = Core::System::GetInstance().Kernel();
-    process->svc_access_mask.set();
-    process->resource_limit =
-        kernel.ResourceLimitForCategory(Kernel::ResourceLimitCategory::APPLICATION);
-    process->Run(Memory::PROCESS_IMAGE_VADDR, Kernel::THREADPRIO_DEFAULT,
-                 Memory::DEFAULT_STACK_SIZE);
+    process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);
 
     is_loaded = true;
     return ResultStatus::Success;
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index aaeb1f2a9..7f142405b 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -4,9 +4,7 @@
 
 #pragma once
 
-#include <string>
 #include "common/common_types.h"
-#include "core/hle/kernel/object.h"
 #include "core/loader/linker.h"
 #include "core/loader/loader.h"
 
@@ -30,7 +28,7 @@ public:
 
     static VAddr LoadModule(FileSys::VirtualFile file, VAddr load_base);
 
-    ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
+    ResultStatus Load(Kernel::Process& process) override;
 };
 
 } // namespace Loader
diff --git a/src/core/loader/nsp.cpp b/src/core/loader/nsp.cpp
index 291a9876d..b7ba77ef4 100644
--- a/src/core/loader/nsp.cpp
+++ b/src/core/loader/nsp.cpp
@@ -10,8 +10,6 @@
 #include "core/file_sys/control_metadata.h"
 #include "core/file_sys/nca_metadata.h"
 #include "core/file_sys/patch_manager.h"
-#include "core/file_sys/registered_cache.h"
-#include "core/file_sys/romfs.h"
 #include "core/file_sys/submission_package.h"
 #include "core/hle/kernel/process.h"
 #include "core/loader/deconstructed_rom_directory.h"
@@ -62,7 +60,7 @@ FileType AppLoader_NSP::IdentifyType(const FileSys::VirtualFile& file) {
     return FileType::Error;
 }
 
-ResultStatus AppLoader_NSP::Load(Kernel::SharedPtr<Kernel::Process>& process) {
+ResultStatus AppLoader_NSP::Load(Kernel::Process& process) {
     if (is_loaded) {
         return ResultStatus::ErrorAlreadyLoaded;
     }
diff --git a/src/core/loader/nsp.h b/src/core/loader/nsp.h
index 7ef810499..eac9b819a 100644
--- a/src/core/loader/nsp.h
+++ b/src/core/loader/nsp.h
@@ -35,7 +35,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
+    ResultStatus Load(Kernel::Process& process) override;
 
     ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
     ResultStatus ReadProgramId(u64& out_program_id) override;
diff --git a/src/core/loader/xci.cpp b/src/core/loader/xci.cpp
index 16509229f..eda67a8c8 100644
--- a/src/core/loader/xci.cpp
+++ b/src/core/loader/xci.cpp
@@ -9,8 +9,6 @@
 #include "core/file_sys/content_archive.h"
 #include "core/file_sys/control_metadata.h"
 #include "core/file_sys/patch_manager.h"
-#include "core/file_sys/romfs.h"
-#include "core/file_sys/submission_package.h"
 #include "core/hle/kernel/process.h"
 #include "core/loader/nca.h"
 #include "core/loader/xci.h"
@@ -46,7 +44,7 @@ FileType AppLoader_XCI::IdentifyType(const FileSys::VirtualFile& file) {
     return FileType::Error;
 }
 
-ResultStatus AppLoader_XCI::Load(Kernel::SharedPtr<Kernel::Process>& process) {
+ResultStatus AppLoader_XCI::Load(Kernel::Process& process) {
     if (is_loaded) {
         return ResultStatus::ErrorAlreadyLoaded;
     }
diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h
index cc4287e17..17e47b658 100644
--- a/src/core/loader/xci.h
+++ b/src/core/loader/xci.h
@@ -35,7 +35,7 @@ public:
         return IdentifyType(file);
     }
 
-    ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
+    ResultStatus Load(Kernel::Process& process) override;
 
     ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
     ResultStatus ReadProgramId(u64& out_program_id) override;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 0e4e0157c..014298ed6 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -3,7 +3,6 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
-#include <array>
 #include <cstring>
 #include <utility>
 
@@ -15,11 +14,11 @@
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/vm_manager.h"
 #include "core/hle/lock.h"
 #include "core/memory.h"
 #include "core/memory_setup.h"
 #include "video_core/renderer_base.h"
-#include "video_core/video_core.h"
 
 namespace Memory {
 
@@ -41,6 +40,21 @@ PageTable* GetCurrentPageTable() {
     return current_page_table;
 }
 
+PageTable::PageTable() = default;
+
+PageTable::PageTable(std::size_t address_space_width_in_bits) {
+    Resize(address_space_width_in_bits);
+}
+
+PageTable::~PageTable() = default;
+
+void PageTable::Resize(std::size_t address_space_width_in_bits) {
+    const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
+
+    pointers.resize(num_page_table_entries);
+    attributes.resize(num_page_table_entries);
+}
+
 static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
     LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
               (base + size) * PAGE_SIZE);
@@ -50,7 +64,7 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
 
     VAddr end = base + size;
     while (base != end) {
-        ASSERT_MSG(base < PAGE_TABLE_NUM_ENTRIES, "out of range mapping at {:016X}", base);
+        ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base);
 
         page_table.attributes[base] = type;
         page_table.pointers[base] = memory;
@@ -105,7 +119,7 @@ void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPoin
 static u8* GetPointerFromVMA(const Kernel::Process& process, VAddr vaddr) {
     u8* direct_pointer = nullptr;
 
-    auto& vm_manager = process.vm_manager;
+    auto& vm_manager = process.VMManager();
 
     auto it = vm_manager.FindVMA(vaddr);
     ASSERT(it != vm_manager.vma_map.end());
@@ -200,7 +214,7 @@ void Write(const VAddr vaddr, const T data) {
 }
 
 bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
-    auto& page_table = process.vm_manager.page_table;
+    const auto& page_table = process.VMManager().page_table;
 
     const u8* page_pointer = page_table.pointers[vaddr >> PAGE_BITS];
     if (page_pointer)
@@ -323,7 +337,7 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
         return;
     }
 
-    VAddr end = start + size;
+    const VAddr end = start + size;
 
     const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
         if (start >= region_end || end <= region_start) {
@@ -333,7 +347,7 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
 
         const VAddr overlap_start = std::max(start, region_start);
         const VAddr overlap_end = std::min(end, region_end);
-        const u64 overlap_size = overlap_end - overlap_start;
+        const VAddr overlap_size = overlap_end - overlap_start;
 
         auto& rasterizer = system_instance.Renderer().Rasterizer();
         switch (mode) {
@@ -349,8 +363,10 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
         }
     };
 
-    CheckRegion(PROCESS_IMAGE_VADDR, PROCESS_IMAGE_VADDR_END);
-    CheckRegion(HEAP_VADDR, HEAP_VADDR_END);
+    const auto& vm_manager = Core::CurrentProcess()->VMManager();
+
+    CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
+    CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
 }
 
 u8 Read8(const VAddr addr) {
@@ -370,16 +386,16 @@ u64 Read64(const VAddr addr) {
 }
 
 void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
-               const size_t size) {
-    auto& page_table = process.vm_manager.page_table;
+               const std::size_t size) {
+    const auto& page_table = process.VMManager().page_table;
 
-    size_t remaining_size = size;
-    size_t page_index = src_addr >> PAGE_BITS;
-    size_t page_offset = src_addr & PAGE_MASK;
+    std::size_t remaining_size = size;
+    std::size_t page_index = src_addr >> PAGE_BITS;
+    std::size_t page_offset = src_addr & PAGE_MASK;
 
     while (remaining_size > 0) {
-        const size_t copy_amount =
-            std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size);
+        const std::size_t copy_amount =
+            std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
@@ -414,7 +430,7 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
     }
 }
 
-void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
+void ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
     ReadBlock(*Core::CurrentProcess(), src_addr, dest_buffer, size);
 }
 
@@ -435,15 +451,15 @@ void Write64(const VAddr addr, const u64 data) {
 }
 
 void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
-                const size_t size) {
-    auto& page_table = process.vm_manager.page_table;
-    size_t remaining_size = size;
-    size_t page_index = dest_addr >> PAGE_BITS;
-    size_t page_offset = dest_addr & PAGE_MASK;
+                const std::size_t size) {
+    const auto& page_table = process.VMManager().page_table;
+    std::size_t remaining_size = size;
+    std::size_t page_index = dest_addr >> PAGE_BITS;
+    std::size_t page_offset = dest_addr & PAGE_MASK;
 
     while (remaining_size > 0) {
-        const size_t copy_amount =
-            std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size);
+        const std::size_t copy_amount =
+            std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
@@ -477,19 +493,19 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
     }
 }
 
-void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size) {
+void WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
     WriteBlock(*Core::CurrentProcess(), dest_addr, src_buffer, size);
 }
 
-void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size_t size) {
-    auto& page_table = process.vm_manager.page_table;
-    size_t remaining_size = size;
-    size_t page_index = dest_addr >> PAGE_BITS;
-    size_t page_offset = dest_addr & PAGE_MASK;
+void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
+    const auto& page_table = process.VMManager().page_table;
+    std::size_t remaining_size = size;
+    std::size_t page_index = dest_addr >> PAGE_BITS;
+    std::size_t page_offset = dest_addr & PAGE_MASK;
 
     while (remaining_size > 0) {
-        const size_t copy_amount =
-            std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size);
+        const std::size_t copy_amount =
+            std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
@@ -522,15 +538,16 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size
     }
 }
 
-void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr, const size_t size) {
-    auto& page_table = process.vm_manager.page_table;
-    size_t remaining_size = size;
-    size_t page_index = src_addr >> PAGE_BITS;
-    size_t page_offset = src_addr & PAGE_MASK;
+void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
+               const std::size_t size) {
+    const auto& page_table = process.VMManager().page_table;
+    std::size_t remaining_size = size;
+    std::size_t page_index = src_addr >> PAGE_BITS;
+    std::size_t page_offset = src_addr & PAGE_MASK;
 
     while (remaining_size > 0) {
-        const size_t copy_amount =
-            std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size);
+        const std::size_t copy_amount =
+            std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
@@ -565,7 +582,7 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
     }
 }
 
-void CopyBlock(VAddr dest_addr, VAddr src_addr, size_t size) {
+void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size) {
     CopyBlock(*Core::CurrentProcess(), dest_addr, src_addr, size);
 }
 
diff --git a/src/core/memory.h b/src/core/memory.h
index f06e04a75..1acf5ce8c 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -4,10 +4,10 @@
 
 #pragma once
 
-#include <array>
 #include <cstddef>
 #include <string>
 #include <tuple>
+#include <vector>
 #include <boost/icl/interval_map.hpp>
 #include "common/common_types.h"
 #include "core/memory_hook.h"
@@ -22,11 +22,9 @@ namespace Memory {
  * Page size used by the ARM architecture. This is the smallest granularity with which memory can
  * be mapped.
  */
-constexpr size_t PAGE_BITS = 12;
-constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
+constexpr std::size_t PAGE_BITS = 12;
+constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
 constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
-constexpr size_t ADDRESS_SPACE_BITS = 36;
-constexpr size_t PAGE_TABLE_NUM_ENTRIES = 1ULL << (ADDRESS_SPACE_BITS - PAGE_BITS);
 
 enum class PageType : u8 {
     /// Page is unmapped and should cause an access error.
@@ -62,32 +60,39 @@ struct SpecialRegion {
  * mimics the way a real CPU page table works.
  */
 struct PageTable {
+    explicit PageTable();
+    explicit PageTable(std::size_t address_space_width_in_bits);
+    ~PageTable();
+
+    /**
+     * Resizes the page table to be able to accomodate enough pages within
+     * a given address space.
+     *
+     * @param address_space_width_in_bits The address size width in bits.
+     */
+    void Resize(std::size_t address_space_width_in_bits);
+
     /**
-     * Array of memory pointers backing each page. An entry can only be non-null if the
-     * corresponding entry in the `attributes` array is of type `Memory`.
+     * Vector of memory pointers backing each page. An entry can only be non-null if the
+     * corresponding entry in the `attributes` vector is of type `Memory`.
      */
-    std::array<u8*, PAGE_TABLE_NUM_ENTRIES> pointers;
+    std::vector<u8*> pointers;
 
     /**
-     * Contains MMIO handlers that back memory regions whose entries in the `attribute` array is of
-     * type `Special`.
+     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
+     * of type `Special`.
      */
     boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
 
     /**
-     * Array of fine grained page attributes. If it is set to any value other than `Memory`, then
+     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
      * the corresponding entry in `pointers` MUST be set to null.
      */
-    std::array<PageType, PAGE_TABLE_NUM_ENTRIES> attributes;
+    std::vector<PageType> attributes;
 };
 
 /// Virtual user-space memory regions
 enum : VAddr {
-    /// Where the application text, data and bss reside.
-    PROCESS_IMAGE_VADDR = 0x08000000,
-    PROCESS_IMAGE_MAX_SIZE = 0x08000000,
-    PROCESS_IMAGE_VADDR_END = PROCESS_IMAGE_VADDR + PROCESS_IMAGE_MAX_SIZE,
-
     /// Read-only page containing kernel and system configuration values.
     CONFIG_MEMORY_VADDR = 0x1FF80000,
     CONFIG_MEMORY_SIZE = 0x00001000,
@@ -98,36 +103,12 @@ enum : VAddr {
     SHARED_PAGE_SIZE = 0x00001000,
     SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
 
-    /// Area where TLS (Thread-Local Storage) buffers are allocated.
-    TLS_AREA_VADDR = 0x40000000,
+    /// TLS (Thread-Local Storage) related.
     TLS_ENTRY_SIZE = 0x200,
-    TLS_AREA_SIZE = 0x10000000,
-    TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
 
     /// Application stack
-    STACK_AREA_VADDR = TLS_AREA_VADDR_END,
-    STACK_AREA_SIZE = 0x10000000,
-    STACK_AREA_VADDR_END = STACK_AREA_VADDR + STACK_AREA_SIZE,
     DEFAULT_STACK_SIZE = 0x100000,
 
-    /// Application heap
-    /// Size is confirmed to be a static value on fw 3.0.0
-    HEAP_VADDR = 0x108000000,
-    HEAP_SIZE = 0x180000000,
-    HEAP_VADDR_END = HEAP_VADDR + HEAP_SIZE,
-
-    /// New map region
-    /// Size is confirmed to be a static value on fw 3.0.0
-    NEW_MAP_REGION_VADDR = HEAP_VADDR_END,
-    NEW_MAP_REGION_SIZE = 0x80000000,
-    NEW_MAP_REGION_VADDR_END = NEW_MAP_REGION_VADDR + NEW_MAP_REGION_SIZE,
-
-    /// Map region
-    /// Size is confirmed to be a static value on fw 3.0.0
-    MAP_REGION_VADDR = NEW_MAP_REGION_VADDR_END,
-    MAP_REGION_SIZE = 0x1000000000,
-    MAP_REGION_VADDR_END = MAP_REGION_VADDR + MAP_REGION_SIZE,
-
     /// Kernel Virtual Address Range
     KERNEL_REGION_VADDR = 0xFFFFFF8000000000,
     KERNEL_REGION_SIZE = 0x7FFFE00000,
@@ -154,13 +135,13 @@ void Write16(VAddr addr, u16 data);
 void Write32(VAddr addr, u32 data);
 void Write64(VAddr addr, u64 data);
 
-void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, size_t size);
-void ReadBlock(VAddr src_addr, void* dest_buffer, size_t size);
+void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, std::size_t size);
+void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
 void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
-                size_t size);
-void WriteBlock(VAddr dest_addr, const void* src_buffer, size_t size);
-void ZeroBlock(const Kernel::Process& process, VAddr dest_addr, size_t size);
-void CopyBlock(VAddr dest_addr, VAddr src_addr, size_t size);
+                std::size_t size);
+void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
+void ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size);
+void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
 
 u8* GetPointer(VAddr vaddr);
 
diff --git a/src/core/memory_hook.h b/src/core/memory_hook.h
index e8ea19333..0269c7ff1 100644
--- a/src/core/memory_hook.h
+++ b/src/core/memory_hook.h
@@ -32,14 +32,14 @@ public:
     virtual boost::optional<u32> Read32(VAddr addr) = 0;
     virtual boost::optional<u64> Read64(VAddr addr) = 0;
 
-    virtual bool ReadBlock(VAddr src_addr, void* dest_buffer, size_t size) = 0;
+    virtual bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) = 0;
 
     virtual bool Write8(VAddr addr, u8 data) = 0;
     virtual bool Write16(VAddr addr, u16 data) = 0;
     virtual bool Write32(VAddr addr, u32 data) = 0;
     virtual bool Write64(VAddr addr, u64 data) = 0;
 
-    virtual bool WriteBlock(VAddr dest_addr, const void* src_buffer, size_t size) = 0;
+    virtual bool WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size) = 0;
 };
 
 using MemoryHookPointer = std::shared_ptr<MemoryHook>;
diff --git a/src/core/tracer/recorder.cpp b/src/core/tracer/recorder.cpp
index af032f0c9..73cacb47f 100644
--- a/src/core/tracer/recorder.cpp
+++ b/src/core/tracer/recorder.cpp
@@ -76,7 +76,7 @@ void Recorder::Finish(const std::string& filename) {
     try {
         // Open file and write header
         FileUtil::IOFile file(filename, "wb");
-        size_t written = file.WriteObject(header);
+        std::size_t written = file.WriteObject(header);
         if (written != 1 || file.Tell() != initial.gpu_registers)
             throw "Failed to write header";
 
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 4e75a72ec..37f09ce5f 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -4,14 +4,12 @@ add_executable(tests
     core/arm/arm_test_common.cpp
     core/arm/arm_test_common.h
     core/core_timing.cpp
-    glad.cpp
     tests.cpp
 )
 
 create_target_directory_groups(tests)
 
 target_link_libraries(tests PRIVATE common core)
-target_link_libraries(tests PRIVATE glad) # To support linker work-around
 target_link_libraries(tests PRIVATE ${PLATFORM_LIBRARIES} catch-single-include Threads::Threads)
 
 add_test(NAME tests COMMAND tests)
diff --git a/src/tests/common/ring_buffer.cpp b/src/tests/common/ring_buffer.cpp
index f3fe57839..c883c4d56 100644
--- a/src/tests/common/ring_buffer.cpp
+++ b/src/tests/common/ring_buffer.cpp
@@ -17,9 +17,9 @@ TEST_CASE("RingBuffer: Basic Tests", "[common]") {
     RingBuffer<char, 4, 1> buf;
 
     // Pushing values into a ring buffer with space should succeed.
-    for (size_t i = 0; i < 4; i++) {
+    for (std::size_t i = 0; i < 4; i++) {
         const char elem = static_cast<char>(i);
-        const size_t count = buf.Push(&elem, 1);
+        const std::size_t count = buf.Push(&elem, 1);
         REQUIRE(count == 1);
     }
 
@@ -28,7 +28,7 @@ TEST_CASE("RingBuffer: Basic Tests", "[common]") {
     // Pushing values into a full ring buffer should fail.
     {
         const char elem = static_cast<char>(42);
-        const size_t count = buf.Push(&elem, 1);
+        const std::size_t count = buf.Push(&elem, 1);
         REQUIRE(count == 0);
     }
 
@@ -57,7 +57,7 @@ TEST_CASE("RingBuffer: Basic Tests", "[common]") {
     {
         std::vector<char> to_push(6);
         std::iota(to_push.begin(), to_push.end(), 88);
-        const size_t count = buf.Push(to_push);
+        const std::size_t count = buf.Push(to_push);
         REQUIRE(count == 3);
     }
 
@@ -79,9 +79,9 @@ TEST_CASE("RingBuffer: Basic Tests", "[common]") {
 TEST_CASE("RingBuffer: Threaded Test", "[common]") {
     RingBuffer<char, 4, 2> buf;
     const char seed = 42;
-    const size_t count = 1000000;
-    size_t full = 0;
-    size_t empty = 0;
+    const std::size_t count = 1000000;
+    std::size_t full = 0;
+    std::size_t empty = 0;
 
     const auto next_value = [](std::array<char, 2>& value) {
         value[0] += 1;
@@ -90,9 +90,9 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") {
 
     std::thread producer{[&] {
         std::array<char, 2> value = {seed, seed};
-        size_t i = 0;
+        std::size_t i = 0;
         while (i < count) {
-            if (const size_t c = buf.Push(&value[0], 1); c > 0) {
+            if (const std::size_t c = buf.Push(&value[0], 1); c > 0) {
                 REQUIRE(c == 1);
                 i++;
                 next_value(value);
@@ -105,7 +105,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") {
 
     std::thread consumer{[&] {
         std::array<char, 2> value = {seed, seed};
-        size_t i = 0;
+        std::size_t i = 0;
         while (i < count) {
             if (const std::vector<char> v = buf.Pop(1); v.size() > 0) {
                 REQUIRE(v.size() == 2);
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 038d57b3a..c0a57e71f 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
+
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
 #include "core/memory.h"
@@ -14,11 +16,12 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
     : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) {
 
     Core::CurrentProcess() = Kernel::Process::Create(kernel, "");
-    page_table = &Core::CurrentProcess()->vm_manager.page_table;
+    page_table = &Core::CurrentProcess()->VMManager().page_table;
 
-    page_table->pointers.fill(nullptr);
+    std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
     page_table->special_regions.clear();
-    page_table->attributes.fill(Memory::PageType::Unmapped);
+    std::fill(page_table->attributes.begin(), page_table->attributes.end(),
+              Memory::PageType::Unmapped);
 
     Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
     Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
@@ -87,11 +90,11 @@ boost::optional<u64> TestEnvironment::TestMemory::Read64(VAddr addr) {
     return *Read32(addr) | static_cast<u64>(*Read32(addr + 4)) << 32;
 }
 
-bool TestEnvironment::TestMemory::ReadBlock(VAddr src_addr, void* dest_buffer, size_t size) {
+bool TestEnvironment::TestMemory::ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) {
     VAddr addr = src_addr;
     u8* data = static_cast<u8*>(dest_buffer);
 
-    for (size_t i = 0; i < size; i++, addr++, data++) {
+    for (std::size_t i = 0; i < size; i++, addr++, data++) {
         *data = *Read8(addr);
     }
 
@@ -126,11 +129,12 @@ bool TestEnvironment::TestMemory::Write64(VAddr addr, u64 data) {
     return true;
 }
 
-bool TestEnvironment::TestMemory::WriteBlock(VAddr dest_addr, const void* src_buffer, size_t size) {
+bool TestEnvironment::TestMemory::WriteBlock(VAddr dest_addr, const void* src_buffer,
+                                             std::size_t size) {
     VAddr addr = dest_addr;
     const u8* data = static_cast<const u8*>(src_buffer);
 
-    for (size_t i = 0; i < size; i++, addr++, data++) {
+    for (std::size_t i = 0; i < size; i++, addr++, data++) {
         env->write_records.emplace_back(8, addr, *data);
         if (env->mutable_memory)
             env->SetMemory8(addr, *data);
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h
index e4b6df194..5de8dab4e 100644
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -19,8 +19,8 @@ struct PageTable;
 namespace ArmTests {
 
 struct WriteRecord {
-    WriteRecord(size_t size, VAddr addr, u64 data) : size(size), addr(addr), data(data) {}
-    size_t size;
+    WriteRecord(std::size_t size, VAddr addr, u64 data) : size(size), addr(addr), data(data) {}
+    std::size_t size;
     VAddr addr;
     u64 data;
     bool operator==(const WriteRecord& o) const {
@@ -71,14 +71,14 @@ private:
         boost::optional<u32> Read32(VAddr addr) override;
         boost::optional<u64> Read64(VAddr addr) override;
 
-        bool ReadBlock(VAddr src_addr, void* dest_buffer, size_t size) override;
+        bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) override;
 
         bool Write8(VAddr addr, u8 data) override;
         bool Write16(VAddr addr, u16 data) override;
         bool Write32(VAddr addr, u32 data) override;
         bool Write64(VAddr addr, u64 data) override;
 
-        bool WriteBlock(VAddr dest_addr, const void* src_buffer, size_t size) override;
+        bool WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size) override;
 
         std::unordered_map<VAddr, u8> data;
     };
diff --git a/src/tests/glad.cpp b/src/tests/glad.cpp
deleted file mode 100644
index 1797c0e3d..000000000
--- a/src/tests/glad.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2016 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <catch2/catch.hpp>
-#include <glad/glad.h>
-
-// This is not an actual test, but a work-around for issue #2183.
-// If tests uses functions in core but doesn't explicitly use functions in glad, the linker of macOS
-// will error about undefined references from video_core to glad. So we explicitly use a glad
-// function here to shut up the linker.
-TEST_CASE("glad fake test", "[dummy]") {
-    REQUIRE(&gladLoadGL != nullptr);
-}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 65b5f57c3..f5ae57039 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,6 +5,8 @@ add_library(video_core STATIC
     debug_utils/debug_utils.h
     engines/fermi_2d.cpp
     engines/fermi_2d.h
+    engines/kepler_memory.cpp
+    engines/kepler_memory.h
     engines/maxwell_3d.cpp
     engines/maxwell_3d.h
     engines/maxwell_compute.cpp
@@ -12,6 +14,7 @@ add_library(video_core STATIC
     engines/maxwell_dma.cpp
     engines/maxwell_dma.h
     engines/shader_bytecode.h
+    engines/shader_header.h
     gpu.cpp
     gpu.h
     macro_interpreter.cpp
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 2625ddfdc..f1aa6091b 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -14,6 +14,7 @@
 #include "core/tracer/recorder.h"
 #include "video_core/command_processor.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_compute.h"
 #include "video_core/engines/maxwell_dma.h"
@@ -69,6 +70,9 @@ void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
         case EngineID::MAXWELL_DMA_COPY_A:
             maxwell_dma->WriteReg(method, value);
             break;
+        case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+            kepler_memory->WriteReg(method, value);
+            break;
         default:
             UNIMPLEMENTED_MSG("Unimplemented engine");
         }
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index dcf9ef8b9..021b83eaa 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -26,7 +26,7 @@ public:
     void WriteReg(u32 method, u32 value);
 
     struct Regs {
-        static constexpr size_t NUM_REGS = 0x258;
+        static constexpr std::size_t NUM_REGS = 0x258;
 
         struct Surface {
             RenderTargetFormat format;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
new file mode 100644
index 000000000..66ae6332d
--- /dev/null
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -0,0 +1,45 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_memory.h"
+
+namespace Tegra::Engines {
+
+KeplerMemory::KeplerMemory(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
+KeplerMemory::~KeplerMemory() = default;
+
+void KeplerMemory::WriteReg(u32 method, u32 value) {
+    ASSERT_MSG(method < Regs::NUM_REGS,
+               "Invalid KeplerMemory register, increase the size of the Regs structure");
+
+    regs.reg_array[method] = value;
+
+    switch (method) {
+    case KEPLERMEMORY_REG_INDEX(exec): {
+        state.write_offset = 0;
+        break;
+    }
+    case KEPLERMEMORY_REG_INDEX(data): {
+        ProcessData(value);
+        break;
+    }
+    }
+}
+
+void KeplerMemory::ProcessData(u32 data) {
+    ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
+    ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
+
+    GPUVAddr address = regs.dest.Address();
+    VAddr dest_address =
+        *memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
+
+    Memory::Write32(dest_address, data);
+
+    state.write_offset++;
+}
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
new file mode 100644
index 000000000..b0d0078cf
--- /dev/null
+++ b/src/video_core/engines/kepler_memory.h
@@ -0,0 +1,90 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Engines {
+
+#define KEPLERMEMORY_REG_INDEX(field_name)                                                         \
+    (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
+
+class KeplerMemory final {
+public:
+    KeplerMemory(MemoryManager& memory_manager);
+    ~KeplerMemory();
+
+    /// Write the value to the register identified by method.
+    void WriteReg(u32 method, u32 value);
+
+    struct Regs {
+        static constexpr size_t NUM_REGS = 0x7F;
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS(0x60);
+
+                u32 line_length_in;
+                u32 line_count;
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 pitch;
+                    u32 block_dimensions;
+                    u32 width;
+                    u32 height;
+                    u32 depth;
+                    u32 z;
+                    u32 x;
+                    u32 y;
+
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } dest;
+
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec;
+
+                u32 data;
+
+                INSERT_PADDING_WORDS(0x11);
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+    struct {
+        u32 write_offset = 0;
+    } state{};
+
+private:
+    MemoryManager& memory_manager;
+
+    void ProcessData(u32 data);
+};
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4,                        \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(line_length_in, 0x60);
+ASSERT_REG_POSITION(line_count, 0x61);
+ASSERT_REG_POSITION(dest, 0x62);
+ASSERT_REG_POSITION(exec, 0x6C);
+ASSERT_REG_POSITION(data, 0x6D);
+#undef ASSERT_REG_POSITION
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 329079ddd..8afd26fe9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -248,8 +248,8 @@ void Maxwell3D::DrawArrays() {
 
 void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
     // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
-    auto& shader = state.shader_stages[static_cast<size_t>(stage)];
-    auto& bind_data = regs.cb_bind[static_cast<size_t>(stage)];
+    auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
+    auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)];
 
     auto& buffer = shader.const_buffers[bind_data.index];
 
@@ -316,14 +316,14 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
 std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {
     std::vector<Texture::FullTextureInfo> textures;
 
-    auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)];
+    auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)];
     auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
     ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
 
     GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
 
     // Offset into the texture constbuffer where the texture info begins.
-    static constexpr size_t TextureInfoOffset = 0x20;
+    static constexpr std::size_t TextureInfoOffset = 0x20;
 
     for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
          current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
@@ -360,8 +360,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
     return textures;
 }
 
-Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const {
-    auto& shader = state.shader_stages[static_cast<size_t>(stage)];
+Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
+                                                    std::size_t offset) const {
+    auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
     auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
     ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
 
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d3be900a4..9f5581045 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -34,17 +34,18 @@ public:
     /// Register structure of the Maxwell3D engine.
     /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
     struct Regs {
-        static constexpr size_t NUM_REGS = 0xE00;
-
-        static constexpr size_t NumRenderTargets = 8;
-        static constexpr size_t NumViewports = 16;
-        static constexpr size_t NumCBData = 16;
-        static constexpr size_t NumVertexArrays = 32;
-        static constexpr size_t NumVertexAttributes = 32;
-        static constexpr size_t MaxShaderProgram = 6;
-        static constexpr size_t MaxShaderStage = 5;
+        static constexpr std::size_t NUM_REGS = 0xE00;
+
+        static constexpr std::size_t NumRenderTargets = 8;
+        static constexpr std::size_t NumViewports = 16;
+        static constexpr std::size_t NumCBData = 16;
+        static constexpr std::size_t NumVertexArrays = 32;
+        static constexpr std::size_t NumVertexAttributes = 32;
+        static constexpr std::size_t NumTextureSamplers = 32;
+        static constexpr std::size_t MaxShaderProgram = 6;
+        static constexpr std::size_t MaxShaderStage = 5;
         // Maximum number of const buffers per shader stage.
-        static constexpr size_t MaxConstBuffers = 18;
+        static constexpr std::size_t MaxConstBuffers = 18;
 
         enum class QueryMode : u32 {
             Write = 0,
@@ -443,9 +444,9 @@ public:
             }
         };
 
-        bool IsShaderConfigEnabled(size_t index) const {
+        bool IsShaderConfigEnabled(std::size_t index) const {
             // The VertexB is always enabled.
-            if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) {
+            if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
                 return true;
             }
             return shader_config[index].enable != 0;
@@ -461,7 +462,11 @@ public:
                     u32 entry;
                 } macros;
 
-                INSERT_PADDING_WORDS(0x1B8);
+                INSERT_PADDING_WORDS(0x189);
+
+                u32 tfb_enabled;
+
+                INSERT_PADDING_WORDS(0x2E);
 
                 RenderTargetConfig rt[NumRenderTargets];
 
@@ -571,7 +576,7 @@ public:
                         BitField<25, 3, u32> map_7;
                     };
 
-                    u32 GetMap(size_t index) const {
+                    u32 GetMap(std::size_t index) const {
                         const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
                                                                      map_4, map_5, map_6, map_7};
                         ASSERT(index < maps.size());
@@ -594,7 +599,9 @@ public:
 
                 u32 depth_write_enabled;
 
-                INSERT_PADDING_WORDS(0x7);
+                u32 alpha_test_enabled;
+
+                INSERT_PADDING_WORDS(0x6);
 
                 u32 d3d_cull_mode;
 
@@ -635,7 +642,11 @@ public:
 
                 u32 vb_element_base;
 
-                INSERT_PADDING_WORDS(0x40);
+                INSERT_PADDING_WORDS(0x38);
+
+                float point_size;
+
+                INSERT_PADDING_WORDS(0x7);
 
                 u32 zeta_enable;
 
@@ -925,7 +936,7 @@ public:
     std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
 
     /// Returns the texture information for a specific texture in a specific shader stage.
-    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
+    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
 
 private:
     VideoCore::RasterizerInterface& rasterizer;
@@ -977,6 +988,7 @@ private:
                   "Field " #field_name " has invalid position")
 
 ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform[0], 0x280);
 ASSERT_REG_POSITION(viewport, 0x300);
@@ -996,6 +1008,7 @@ ASSERT_REG_POSITION(zeta_height, 0x48b);
 ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
 ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
 ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
+ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB);
 ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
 ASSERT_REG_POSITION(depth_test_func, 0x4C3);
 ASSERT_REG_POSITION(blend, 0x4CF);
@@ -1009,6 +1022,7 @@ ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6);
 ASSERT_REG_POSITION(stencil_front_mask, 0x4E7);
 ASSERT_REG_POSITION(screen_y_control, 0x4EB);
 ASSERT_REG_POSITION(vb_element_base, 0x50D);
+ASSERT_REG_POSITION(point_size, 0x546);
 ASSERT_REG_POSITION(zeta_enable, 0x54E);
 ASSERT_REG_POSITION(tsc, 0x557);
 ASSERT_REG_POSITION(tic, 0x55D);
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp
index e4e5f9e5e..59e28b22d 100644
--- a/src/video_core/engines/maxwell_compute.cpp
+++ b/src/video_core/engines/maxwell_compute.cpp
@@ -2,12 +2,29 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/logging/log.h"
+#include "core/core.h"
 #include "video_core/engines/maxwell_compute.h"
 
 namespace Tegra {
 namespace Engines {
 
-void MaxwellCompute::WriteReg(u32 method, u32 value) {}
+void MaxwellCompute::WriteReg(u32 method, u32 value) {
+    ASSERT_MSG(method < Regs::NUM_REGS,
+               "Invalid MaxwellCompute register, increase the size of the Regs structure");
+
+    regs.reg_array[method] = value;
+
+    switch (method) {
+    case MAXWELL_COMPUTE_REG_INDEX(compute): {
+        LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");
+        UNREACHABLE();
+        break;
+    }
+    default:
+        break;
+    }
+}
 
 } // namespace Engines
 } // namespace Tegra
diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/maxwell_compute.h
index 2b3e4ced6..6ea934fb9 100644
--- a/src/video_core/engines/maxwell_compute.h
+++ b/src/video_core/engines/maxwell_compute.h
@@ -4,17 +4,53 @@
 
 #pragma once
 
+#include <array>
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
 #include "common/common_types.h"
 
 namespace Tegra::Engines {
 
+#define MAXWELL_COMPUTE_REG_INDEX(field_name)                                                      \
+    (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32))
+
 class MaxwellCompute final {
 public:
     MaxwellCompute() = default;
     ~MaxwellCompute() = default;
 
+    struct Regs {
+        static constexpr std::size_t NUM_REGS = 0xCF8;
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS(0x281);
+
+                union {
+                    u32 compute_end;
+                    BitField<0, 1, u32> unknown;
+                } compute;
+
+                INSERT_PADDING_WORDS(0xA76);
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+    static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
+                  "MaxwellCompute Regs has wrong size");
+
     /// Write the value to the register identified by method.
     void WriteReg(u32 method, u32 value);
 };
 
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4,                      \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(compute, 0x281);
+
+#undef ASSERT_REG_POSITION
+
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index c24d33d5c..aa7481b8c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -50,7 +50,7 @@ void MaxwellDMA::HandleCopy() {
     ASSERT(regs.dst_params.pos_y == 0);
 
     if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
-        size_t copy_size = regs.x_count;
+        std::size_t copy_size = regs.x_count;
 
         // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
         // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count).
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 7882f16e0..311ccb616 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -23,7 +23,7 @@ public:
     void WriteReg(u32 method, u32 value);
 
     struct Regs {
-        static constexpr size_t NUM_REGS = 0x1D6;
+        static constexpr std::size_t NUM_REGS = 0x1D6;
 
         struct Parameters {
             union {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 58f2904ce..b1f137b9c 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -5,9 +5,8 @@
 #pragma once
 
 #include <bitset>
-#include <cstring>
-#include <map>
 #include <string>
+#include <tuple>
 #include <vector>
 
 #include <boost/optional.hpp>
@@ -20,10 +19,10 @@ namespace Tegra::Shader {
 
 struct Register {
     /// Number of registers
-    static constexpr size_t NumRegisters = 256;
+    static constexpr std::size_t NumRegisters = 256;
 
     /// Register 255 is special cased to always be 0
-    static constexpr size_t ZeroIndex = 255;
+    static constexpr std::size_t ZeroIndex = 255;
 
     enum class Size : u64 {
         Byte = 0,
@@ -67,6 +66,13 @@ private:
     u64 value{};
 };
 
+enum class AttributeSize : u64 {
+    Word = 0,
+    DoubleWord = 1,
+    TripleWord = 2,
+    QuadWord = 3,
+};
+
 union Attribute {
     Attribute() = default;
 
@@ -87,9 +93,10 @@ union Attribute {
     };
 
     union {
+        BitField<20, 10, u64> immediate;
         BitField<22, 2, u64> element;
         BitField<24, 6, Index> index;
-        BitField<47, 3, u64> size;
+        BitField<47, 3, AttributeSize> size;
     } fmt20;
 
     union {
@@ -232,6 +239,41 @@ enum class FlowCondition : u64 {
     Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
 };
 
+enum class ControlCode : u64 {
+    F = 0,
+    LT = 1,
+    EQ = 2,
+    LE = 3,
+    GT = 4,
+    NE = 5,
+    GE = 6,
+    Num = 7,
+    Nan = 8,
+    LTU = 9,
+    EQU = 10,
+    LEU = 11,
+    GTU = 12,
+    NEU = 13,
+    GEU = 14,
+    //
+    OFF = 16,
+    LO = 17,
+    SFF = 18,
+    LS = 19,
+    HI = 20,
+    SFT = 21,
+    HS = 22,
+    OFT = 23,
+    CSM_TA = 24,
+    CSM_TR = 25,
+    CSM_MX = 26,
+    FCSM_TA = 27,
+    FCSM_TR = 28,
+    FCSM_MX = 29,
+    RLE = 30,
+    RGT = 31,
+};
+
 enum class PredicateResultMode : u64 {
     None = 0x0,
     NotZero = 0x3,
@@ -263,17 +305,38 @@ enum class TextureProcessMode : u64 {
     LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL
 };
 
-enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };
-enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 };
+enum class TextureMiscMode : u64 {
+    DC,
+    AOFFI, // Uses Offset
+    NDV,
+    NODEP,
+    MZ,
+    PTP,
+};
+
+enum class IpaInterpMode : u64 {
+    Linear = 0,
+    Perspective = 1,
+    Flat = 2,
+    Sc = 3,
+};
+
+enum class IpaSampleMode : u64 {
+    Default = 0,
+    Centroid = 1,
+    Offset = 2,
+};
 
 struct IpaMode {
     IpaInterpMode interpolation_mode;
     IpaSampleMode sampling_mode;
-    inline bool operator==(const IpaMode& a) {
-        return (a.interpolation_mode == interpolation_mode) && (a.sampling_mode == sampling_mode);
+
+    bool operator==(const IpaMode& a) const {
+        return std::tie(interpolation_mode, sampling_mode) ==
+               std::tie(a.interpolation_mode, a.sampling_mode);
     }
-    inline bool operator!=(const IpaMode& a) {
-        return !((*this) == a);
+    bool operator!=(const IpaMode& a) const {
+        return !operator==(a);
     }
 };
 
@@ -538,6 +601,15 @@ union Instruction {
     } pset;
 
     union {
+        BitField<0, 3, u64> pred0;
+        BitField<3, 3, u64> pred3;
+        BitField<8, 5, ControlCode> cc; // flag in cc
+        BitField<39, 3, u64> pred39;
+        BitField<42, 1, u64> neg_pred39;
+        BitField<45, 4, PredOperation> op; // op with pred39
+    } csetp;
+
+    union {
         BitField<39, 3, u64> pred39;
         BitField<42, 1, u64> neg_pred;
         BitField<43, 1, u64> neg_a;
@@ -582,42 +654,127 @@ union Instruction {
         BitField<28, 1, u64> array;
         BitField<29, 2, TextureType> texture_type;
         BitField<31, 4, u64> component_mask;
+        BitField<49, 1, u64> nodep_flag;
+        BitField<50, 1, u64> dc_flag;
+        BitField<54, 1, u64> aoffi_flag;
         BitField<55, 3, TextureProcessMode> process_mode;
 
-        bool IsComponentEnabled(size_t component) const {
+        bool IsComponentEnabled(std::size_t component) const {
             return ((1ull << component) & component_mask) != 0;
         }
+
+        TextureProcessMode GetTextureProcessMode() const {
+            return process_mode;
+        }
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::DC:
+                return dc_flag != 0;
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            case TextureMiscMode::AOFFI:
+                return aoffi_flag != 0;
+            default:
+                break;
+            }
+            return false;
+        }
     } tex;
 
     union {
         BitField<22, 6, TextureQueryType> query_type;
         BitField<31, 4, u64> component_mask;
+        BitField<49, 1, u64> nodep_flag;
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            default:
+                break;
+            }
+            return false;
+        }
     } txq;
 
     union {
         BitField<28, 1, u64> array;
         BitField<29, 2, TextureType> texture_type;
         BitField<31, 4, u64> component_mask;
+        BitField<35, 1, u64> ndv_flag;
+        BitField<49, 1, u64> nodep_flag;
 
-        bool IsComponentEnabled(size_t component) const {
+        bool IsComponentEnabled(std::size_t component) const {
             return ((1ull << component) & component_mask) != 0;
         }
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::NDV:
+                return (ndv_flag != 0);
+            case TextureMiscMode::NODEP:
+                return (nodep_flag != 0);
+            default:
+                break;
+            }
+            return false;
+        }
     } tmml;
 
     union {
         BitField<28, 1, u64> array;
         BitField<29, 2, TextureType> texture_type;
+        BitField<35, 1, u64> ndv_flag;
+        BitField<49, 1, u64> nodep_flag;
+        BitField<50, 1, u64> dc_flag;
+        BitField<54, 2, u64> info;
         BitField<56, 2, u64> component;
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::NDV:
+                return ndv_flag != 0;
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            case TextureMiscMode::DC:
+                return dc_flag != 0;
+            case TextureMiscMode::AOFFI:
+                return info == 1;
+            case TextureMiscMode::PTP:
+                return info == 2;
+            default:
+                break;
+            }
+            return false;
+        }
     } tld4;
 
     union {
+        BitField<49, 1, u64> nodep_flag;
+        BitField<50, 1, u64> dc_flag;
+        BitField<51, 1, u64> aoffi_flag;
         BitField<52, 2, u64> component;
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::DC:
+                return dc_flag != 0;
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            case TextureMiscMode::AOFFI:
+                return aoffi_flag != 0;
+            default:
+                break;
+            }
+            return false;
+        }
     } tld4s;
 
     union {
         BitField<0, 8, Register> gpr0;
         BitField<28, 8, Register> gpr28;
-        BitField<49, 1, u64> nodep;
+        BitField<49, 1, u64> nodep_flag;
         BitField<50, 3, u64> component_mask_selector;
         BitField<53, 4, u64> texture_info;
 
@@ -637,6 +794,37 @@ union Instruction {
             UNREACHABLE();
         }
 
+        TextureProcessMode GetTextureProcessMode() const {
+            switch (texture_info) {
+            case 0:
+            case 2:
+            case 6:
+            case 8:
+            case 9:
+            case 11:
+                return TextureProcessMode::LZ;
+            case 3:
+            case 5:
+            case 13:
+                return TextureProcessMode::LL;
+            default:
+                break;
+            }
+            return TextureProcessMode::None;
+        }
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::DC:
+                return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            default:
+                break;
+            }
+            return false;
+        }
+
         bool IsArrayTexture() const {
             // TEXS only supports Texture2D arrays.
             return texture_info >= 7 && texture_info <= 9;
@@ -646,7 +834,7 @@ union Instruction {
             return gpr28.Value() != Register::ZeroIndex;
         }
 
-        bool IsComponentEnabled(size_t component) const {
+        bool IsComponentEnabled(std::size_t component) const {
             static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
                 {},
                 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
@@ -654,7 +842,7 @@ union Instruction {
                 {0x7, 0xb, 0xd, 0xe, 0xf},
             }};
 
-            size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
+            std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
             index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
 
             u32 mask = mask_lut[index][component_mask_selector];
@@ -665,6 +853,7 @@ union Instruction {
     } texs;
 
     union {
+        BitField<49, 1, u64> nodep_flag;
         BitField<53, 4, u64> texture_info;
 
         TextureType GetTextureType() const {
@@ -685,6 +874,26 @@ union Instruction {
             UNREACHABLE();
         }
 
+        TextureProcessMode GetTextureProcessMode() const {
+            if (texture_info == 1 || texture_info == 5 || texture_info == 12)
+                return TextureProcessMode::LL;
+            return TextureProcessMode::LZ;
+        }
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::AOFFI:
+                return texture_info == 12 || texture_info == 4;
+            case TextureMiscMode::MZ:
+                return texture_info == 5;
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            default:
+                break;
+            }
+            return false;
+        }
+
         bool IsArrayTexture() const {
             // TEXS only supports Texture2D arrays.
             return texture_info == 8;
@@ -727,6 +936,7 @@ union Instruction {
         BitField<36, 5, u64> index;
     } cbuf36;
 
+    BitField<47, 1, u64> generates_cc;
     BitField<61, 1, u64> is_b_imm;
     BitField<60, 1, u64> is_b_gpr;
     BitField<59, 1, u64> is_c_gpr;
@@ -851,6 +1061,7 @@ public:
         ISET_IMM,
         PSETP,
         PSET,
+        CSETP,
         XMAD_IMM,
         XMAD_CR,
         XMAD_RC,
@@ -939,7 +1150,7 @@ public:
 private:
     struct Detail {
     private:
-        static constexpr size_t opcode_bitsize = 16;
+        static constexpr std::size_t opcode_bitsize = 16;
 
         /**
          * Generates the mask and the expected value after masking from a given bitstring.
@@ -948,8 +1159,8 @@ private:
          */
         static auto GetMaskAndExpect(const char* const bitstring) {
             u16 mask = 0, expect = 0;
-            for (size_t i = 0; i < opcode_bitsize; i++) {
-                const size_t bit_position = opcode_bitsize - i - 1;
+            for (std::size_t i = 0; i < opcode_bitsize; i++) {
+                const std::size_t bit_position = opcode_bitsize - i - 1;
                 switch (bitstring[i]) {
                 case '0':
                     mask |= 1 << bit_position;
@@ -1087,6 +1298,7 @@ private:
             INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
             INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
             INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
+            INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
             INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
             INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
             INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
new file mode 100644
index 000000000..a885ee3cf
--- /dev/null
+++ b/src/video_core/engines/shader_header.h
@@ -0,0 +1,103 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra::Shader {
+
+enum class OutputTopology : u32 {
+    PointList = 1,
+    LineStrip = 6,
+    TriangleStrip = 7,
+};
+
+// Documentation in:
+// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
+struct Header {
+    union {
+        BitField<0, 5, u32> sph_type;
+        BitField<5, 5, u32> version;
+        BitField<10, 4, u32> shader_type;
+        BitField<14, 1, u32> mrt_enable;
+        BitField<15, 1, u32> kills_pixels;
+        BitField<16, 1, u32> does_global_store;
+        BitField<17, 4, u32> sass_version;
+        BitField<21, 5, u32> reserved;
+        BitField<26, 1, u32> does_load_or_store;
+        BitField<27, 1, u32> does_fp64;
+        BitField<28, 4, u32> stream_out_mask;
+    } common0;
+
+    union {
+        BitField<0, 24, u32> shader_local_memory_low_size;
+        BitField<24, 8, u32> per_patch_attribute_count;
+    } common1;
+
+    union {
+        BitField<0, 24, u32> shader_local_memory_high_size;
+        BitField<24, 8, u32> threads_per_input_primitive;
+    } common2;
+
+    union {
+        BitField<0, 24, u32> shader_local_memory_crs_size;
+        BitField<24, 4, OutputTopology> output_topology;
+        BitField<28, 4, u32> reserved;
+    } common3;
+
+    union {
+        BitField<0, 12, u32> max_output_vertices;
+        BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
+        BitField<24, 4, u32> reserved;
+        BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
+    } common4;
+
+    union {
+        struct {
+            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
+            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
+            INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
+            INSERT_PADDING_BYTES(2);  // ImapColor
+            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
+            INSERT_PADDING_BYTES(5);  // ImapFixedFncTexture[10]
+            INSERT_PADDING_BYTES(1);  // ImapReserved
+            INSERT_PADDING_BYTES(3);  // OmapSystemValuesA
+            INSERT_PADDING_BYTES(1);  // OmapSystemValuesB
+            INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
+            INSERT_PADDING_BYTES(2);  // OmapColor
+            INSERT_PADDING_BYTES(2);  // OmapSystemValuesC
+            INSERT_PADDING_BYTES(5);  // OmapFixedFncTexture[10]
+            INSERT_PADDING_BYTES(1);  // OmapReserved
+        } vtg;
+
+        struct {
+            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
+            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
+            INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
+            INSERT_PADDING_BYTES(2);  // ImapColor
+            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
+            INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
+            INSERT_PADDING_BYTES(2);  // ImapReserved
+            struct {
+                u32 target;
+                union {
+                    BitField<0, 1, u32> sample_mask;
+                    BitField<1, 1, u32> depth;
+                    BitField<2, 30, u32> reserved;
+                };
+            } omap;
+            bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
+                const u32 bit = render_target * 4 + component;
+                return omap.target & (1 << bit);
+            }
+        } ps;
+    };
+};
+
+static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
+
+} // namespace Tegra::Shader
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 86a809f86..baa8b63b7 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -4,6 +4,7 @@
 
 #include "common/assert.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_compute.h"
 #include "video_core/engines/maxwell_dma.h"
@@ -27,6 +28,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
     fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
     maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
     maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(*memory_manager);
 }
 
 GPU::~GPU() = default;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 589a59b4f..5cc1e19ca 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -42,6 +42,7 @@ enum class RenderTargetFormat : u32 {
     R32_UINT = 0xE4,
     R32_FLOAT = 0xE5,
     B5G6R5_UNORM = 0xE8,
+    BGR5A1_UNORM = 0xE9,
     RG8_UNORM = 0xEA,
     RG8_SNORM = 0xEB,
     R16_UNORM = 0xEE,
@@ -102,6 +103,7 @@ class Fermi2D;
 class Maxwell3D;
 class MaxwellCompute;
 class MaxwellDMA;
+class KeplerMemory;
 } // namespace Engines
 
 enum class EngineID {
@@ -146,6 +148,8 @@ private:
     std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
     /// DMA engine
     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
+    /// Inline memory engine
+    std::unique_ptr<Engines::KeplerMemory> kepler_memory;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index 7d836b816..cee0baaf3 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -152,7 +152,7 @@ private:
     boost::optional<u32>
         delayed_pc; ///< Program counter to execute at after the delay slot is executed.
 
-    static constexpr size_t NumMacroRegisters = 8;
+    static constexpr std::size_t NumMacroRegisters = 8;
 
     /// General purpose macro registers.
     std::array<u32, NumMacroRegisters> registers = {};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 0b5d18bcb..578aca789 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -12,10 +12,10 @@
 
 namespace OpenGL {
 
-OGLBufferCache::OGLBufferCache(size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
+OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
 
-GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment,
-                                      bool cache) {
+GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
+                                      std::size_t alignment, bool cache) {
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
     const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
 
@@ -53,7 +53,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, siz
     return uploaded_offset;
 }
 
-GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment) {
+GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size,
+                                          std::size_t alignment) {
     AlignBuffer(alignment);
     std::memcpy(buffer_ptr, raw_pointer, size);
     GLintptr uploaded_offset = buffer_offset;
@@ -63,7 +64,7 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size,
     return uploaded_offset;
 }
 
-void OGLBufferCache::Map(size_t max_size) {
+void OGLBufferCache::Map(std::size_t max_size) {
     bool invalidate;
     std::tie(buffer_ptr, buffer_offset_base, invalidate) =
         stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
@@ -81,10 +82,10 @@ GLuint OGLBufferCache::GetHandle() const {
     return stream_buffer.GetHandle();
 }
 
-void OGLBufferCache::AlignBuffer(size_t alignment) {
+void OGLBufferCache::AlignBuffer(std::size_t alignment) {
     // Align the offset, not the mapped pointer
     GLintptr offset_aligned =
-        static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
+        static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
     buffer_ptr += offset_aligned - buffer_offset;
     buffer_offset = offset_aligned;
 }
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 6da862902..6c18461f4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -19,32 +19,32 @@ struct CachedBufferEntry final {
         return addr;
     }
 
-    size_t GetSizeInBytes() const {
+    std::size_t GetSizeInBytes() const {
         return size;
     }
 
     VAddr addr;
-    size_t size;
+    std::size_t size;
     GLintptr offset;
-    size_t alignment;
+    std::size_t alignment;
 };
 
 class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
 public:
-    explicit OGLBufferCache(size_t size);
+    explicit OGLBufferCache(std::size_t size);
 
-    GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment = 4,
+    GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
                           bool cache = true);
 
-    GLintptr UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment = 4);
+    GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
 
-    void Map(size_t max_size);
+    void Map(std::size_t max_size);
     void Unmap();
 
     GLuint GetHandle() const;
 
 protected:
-    void AlignBuffer(size_t alignment);
+    void AlignBuffer(std::size_t alignment);
 
 private:
     OGLStreamBuffer stream_buffer;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7e1bba67d..1fcd13f04 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -46,7 +46,7 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100,
 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
     : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
     // Create sampler objects
-    for (size_t i = 0; i < texture_samplers.size(); ++i) {
+    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
         texture_samplers[i].Create();
         state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
     }
@@ -181,7 +181,7 @@ void RasterizerOpenGL::SetupShaders() {
     u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
     u32 current_texture_bindpoint = 0;
 
-    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         const auto& shader_config = gpu.regs.shader_config[index];
         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
 
@@ -190,12 +190,12 @@ void RasterizerOpenGL::SetupShaders() {
             continue;
         }
 
-        const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
+        const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
 
         GLShader::MaxwellUniformData ubo{};
         ubo.SetFromRegs(gpu.state.shader_stages[stage]);
         const GLintptr offset = buffer_cache.UploadHostMemory(
-            &ubo, sizeof(ubo), static_cast<size_t>(uniform_buffer_alignment));
+            &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
 
         // Bind the buffer
         glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo));
@@ -238,10 +238,10 @@ void RasterizerOpenGL::SetupShaders() {
     shader_program_manager->UseTrivialGeometryShader();
 }
 
-size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
+std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
-    size_t size = 0;
+    std::size_t size = 0;
     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
         if (!regs.vertex_array[index].IsEnabled())
             continue;
@@ -299,7 +299,7 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
 
 void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
                                              bool preserve_contents,
-                                             boost::optional<size_t> single_color_target) {
+                                             boost::optional<std::size_t> single_color_target) {
     MICROPROFILE_SCOPE(OpenGL_Framebuffer);
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
@@ -330,7 +330,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
         } else {
             // Multiple color attachments are enabled
             std::array<GLenum, Maxwell::NumRenderTargets> buffers;
-            for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+            for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
                 Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
                 buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
                 glFramebufferTexture2D(
@@ -342,7 +342,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
         }
     } else {
         // No color attachments are enabled - zero out all of them
-        for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+        for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
                                    GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
                                    0, 0);
@@ -383,7 +383,7 @@ void RasterizerOpenGL::Clear() {
     bool use_stencil{};
 
     OpenGLState clear_state;
-    clear_state.draw.draw_framebuffer = state.draw.draw_framebuffer;
+    clear_state.draw.draw_framebuffer = framebuffer.handle;
     clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
     clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
     clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
@@ -450,6 +450,9 @@ void RasterizerOpenGL::DrawArrays() {
     SyncBlendState();
     SyncLogicOpState();
     SyncCullMode();
+    SyncAlphaTest();
+    SyncTransformFeedback();
+    SyncPointState();
 
     // TODO(bunnei): Sync framebuffer_scale uniform here
     // TODO(bunnei): Sync scissorbox uniform(s) here
@@ -462,15 +465,15 @@ void RasterizerOpenGL::DrawArrays() {
     state.draw.vertex_buffer = buffer_cache.GetHandle();
     state.Apply();
 
-    size_t buffer_size = CalculateVertexArraysSize();
+    std::size_t buffer_size = CalculateVertexArraysSize();
 
     if (is_indexed) {
-        buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size;
+        buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;
     }
 
     // Uniform space for the 5 shader stages
     buffer_size =
-        Common::AlignUp<size_t>(buffer_size, 4) +
+        Common::AlignUp<std::size_t>(buffer_size, 4) +
         (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
 
     // Add space for at least 18 constant buffers
@@ -484,8 +487,13 @@ void RasterizerOpenGL::DrawArrays() {
     GLintptr index_buffer_offset = 0;
     if (is_indexed) {
         MICROPROFILE_SCOPE(OpenGL_Index);
-        index_buffer_offset =
-            buffer_cache.UploadMemory(regs.index_array.StartAddress(), index_buffer_size);
+
+        // Adjust the index buffer offset so it points to the first desired index.
+        auto index_start = regs.index_array.StartAddress();
+        index_start += static_cast<size_t>(regs.index_array.first) *
+                       static_cast<size_t>(regs.index_array.FormatSizeInBytes());
+
+        index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size);
     }
 
     SetupShaders();
@@ -499,10 +507,6 @@ void RasterizerOpenGL::DrawArrays() {
     if (is_indexed) {
         const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
 
-        // Adjust the index buffer offset so it points to the first desired index.
-        index_buffer_offset += static_cast<GLintptr>(regs.index_array.first) *
-                               static_cast<GLintptr>(regs.index_array.FormatSizeInBytes());
-
         if (gpu.state.current_instance > 0) {
             glDrawElementsInstancedBaseVertexBaseInstance(
                 primitive_mode, regs.index_array.count,
@@ -644,7 +648,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
     MICROPROFILE_SCOPE(OpenGL_UBO);
     const auto& gpu = Core::System::GetInstance().GPU();
     const auto& maxwell3d = gpu.Maxwell3D();
-    const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
+    const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
     const auto& entries = shader->GetShaderEntries().const_buffer_entries;
 
     constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
@@ -667,7 +671,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
             continue;
         }
 
-        size_t size = 0;
+        std::size_t size = 0;
 
         if (used_buffer.IsIndirect()) {
             // Buffer is accessed indirectly, so upload the entire thing
@@ -689,7 +693,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
         ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
 
         GLintptr const_buffer_offset = buffer_cache.UploadMemory(
-            buffer.address, size, static_cast<size_t>(uniform_buffer_alignment));
+            buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
 
         // Now configure the bindpoint of the buffer inside the shader
         glUniformBlockBinding(shader->GetProgramHandle(),
@@ -882,4 +886,30 @@ void RasterizerOpenGL::SyncLogicOpState() {
     state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation);
 }
 
+void RasterizerOpenGL::SyncAlphaTest() {
+    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+
+    // TODO(Rodrigo): Alpha testing is a legacy OpenGL feature, but it can be
+    // implemented with a test+discard in fragment shaders.
+    if (regs.alpha_test_enabled != 0) {
+        LOG_CRITICAL(Render_OpenGL, "Alpha testing is not implemented");
+        UNREACHABLE();
+    }
+}
+
+void RasterizerOpenGL::SyncTransformFeedback() {
+    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+
+    if (regs.tfb_enabled != 0) {
+        LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
+        UNREACHABLE();
+    }
+}
+
+void RasterizerOpenGL::SyncPointState() {
+    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+
+    state.point.size = regs.point_size;
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 163412882..4c8ecbd1c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -73,7 +73,7 @@ public:
     };
 
     /// Maximum supported size that a constbuffer can have in bytes.
-    static constexpr size_t MaxConstbufferSize = 0x10000;
+    static constexpr std::size_t MaxConstbufferSize = 0x10000;
     static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
                   "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
 
@@ -106,7 +106,7 @@ private:
      */
     void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
                                bool preserve_contents = true,
-                               boost::optional<size_t> single_color_target = {});
+                               boost::optional<std::size_t> single_color_target = {});
 
     /*
      * Configures the current constbuffers to use for the draw command.
@@ -158,6 +158,15 @@ private:
     /// Syncs the LogicOp state to match the guest state
     void SyncLogicOpState();
 
+    /// Syncs the alpha test state to match the guest state
+    void SyncAlphaTest();
+
+    /// Syncs the transform feedback state to match the guest state
+    void SyncTransformFeedback();
+
+    /// Syncs the point state to match the guest state
+    void SyncPointState();
+
     bool has_ARB_direct_state_access = false;
     bool has_ARB_multi_bind = false;
     bool has_ARB_separate_shader_objects = false;
@@ -178,14 +187,14 @@ private:
              OGLVertexArray>
         vertex_array_cache;
 
-    std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
+    std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
 
-    static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+    static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
     OGLBufferCache buffer_cache;
     OGLFramebuffer framebuffer;
     GLint uniform_buffer_alignment;
 
-    size_t CalculateVertexArraysSize() const;
+    std::size_t CalculateVertexArraysSize() const;
 
     void SetupVertexArrays();
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 32001e44b..24a540258 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -75,7 +75,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     return params;
 }
 
-/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) {
+/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {
     const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
     SurfaceParams params{};
     params.addr = TryGetCpuAddr(config.Address());
@@ -141,8 +141,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
      true}, // BC7U
     {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8,
-     ComponentType::UNorm, true}, // BC6H_UF16
-    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     ComponentType::Float, true}, // BC6H_UF16
+    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
      true},                                                                    // BC6H_SF16
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_4X4
     {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},            // G8R8U
@@ -167,6 +167,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                                // RG8S
     {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // RG32UI
     {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // R32UI
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
 
     // Depth formats
     {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
@@ -203,7 +204,7 @@ static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {
 }
 
 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
-    ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
+    ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
     auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
     ASSERT(component_type == format.component_type);
 
@@ -213,6 +214,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
 static bool IsPixelFormatASTC(PixelFormat format) {
     switch (format) {
     case PixelFormat::ASTC_2D_4X4:
+    case PixelFormat::ASTC_2D_8X8:
         return true;
     default:
         return false;
@@ -223,6 +225,8 @@ static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
     switch (format) {
     case PixelFormat::ASTC_2D_4X4:
         return {4, 4};
+    case PixelFormat::ASTC_2D_8X8:
+        return {8, 8};
     default:
         LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
         UNREACHABLE();
@@ -256,7 +260,7 @@ static bool IsFormatBCn(PixelFormat format) {
 }
 
 template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t gl_buffer_size,
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size,
                 VAddr addr) {
     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
@@ -267,7 +271,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t
         const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
         const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
             addr, tile_size, bytes_per_pixel, stride, height, block_height);
-        const size_t size_to_copy{std::min(gl_buffer_size, data.size())};
+        const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
         memcpy(gl_buffer, data.data(), size_to_copy);
     } else {
         // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
@@ -278,7 +282,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t
     }
 }
 
-static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
                             SurfaceParams::MaxPixelFormat>
     morton_to_gl_fns = {
         // clang-format off
@@ -327,6 +331,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
         MortonCopy<true, PixelFormat::RG8S>,
         MortonCopy<true, PixelFormat::RG32UI>,
         MortonCopy<true, PixelFormat::R32UI>,
+        MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
         MortonCopy<true, PixelFormat::Z32F>,
         MortonCopy<true, PixelFormat::Z16>,
         MortonCopy<true, PixelFormat::Z24S8>,
@@ -335,7 +340,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
         // clang-format on
 };
 
-static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
                             SurfaceParams::MaxPixelFormat>
     gl_to_morton_fns = {
         // clang-format off
@@ -386,6 +391,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
         MortonCopy<false, PixelFormat::RG8S>,
         MortonCopy<false, PixelFormat::RG32UI>,
         MortonCopy<false, PixelFormat::R32UI>,
+        nullptr,
         MortonCopy<false, PixelFormat::Z32F>,
         MortonCopy<false, PixelFormat::Z16>,
         MortonCopy<false, PixelFormat::Z24S8>,
@@ -495,6 +501,9 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
     glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
     glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
     glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+    VideoCore::LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
+                             SurfaceParams::SurfaceTargetName(params.target));
 }
 
 static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
@@ -513,9 +522,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
     S8Z24 input_pixel{};
     Z24S8 output_pixel{};
     constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
-    for (size_t y = 0; y < height; ++y) {
-        for (size_t x = 0; x < width; ++x) {
-            const size_t offset{bpp * (y * width + x)};
+    for (std::size_t y = 0; y < height; ++y) {
+        for (std::size_t x = 0; x < width; ++x) {
+            const std::size_t offset{bpp * (y * width + x)};
             std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
             output_pixel.s8.Assign(input_pixel.s8);
             output_pixel.z24.Assign(input_pixel.z24);
@@ -526,9 +535,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
 
 static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
     constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
-    for (size_t y = 0; y < height; ++y) {
-        for (size_t x = 0; x < width; ++x) {
-            const size_t offset{bpp * (y * width + x)};
+    for (std::size_t y = 0; y < height; ++y) {
+        for (std::size_t x = 0; x < width; ++x) {
+            const std::size_t offset{bpp * (y * width + x)};
             const u8 temp{data[offset]};
             data[offset] = data[offset + 1];
             data[offset + 1] = temp;
@@ -544,7 +553,8 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
 static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
                                                u32 width, u32 height) {
     switch (pixel_format) {
-    case PixelFormat::ASTC_2D_4X4: {
+    case PixelFormat::ASTC_2D_4X4:
+    case PixelFormat::ASTC_2D_8X8: {
         // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
         u32 block_width{};
         u32 block_height{};
@@ -591,13 +601,13 @@ void CachedSurface::LoadGLBuffer() {
             UNREACHABLE();
         }
 
-        gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size);
-        morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
+        gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size);
+        morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
             params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
             params.addr);
     } else {
         const u8* const texture_src_data_end{texture_src_data +
-                                             (static_cast<size_t>(params.depth) * copy_size)};
+                                             (static_cast<std::size_t>(params.depth) * copy_size)};
         gl_buffer.assign(texture_src_data, texture_src_data_end);
     }
 
@@ -616,7 +626,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
 
     MICROPROFILE_SCOPE(OpenGL_TextureUL);
 
-    ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height *
+    ASSERT(gl_buffer.size() == static_cast<std::size_t>(params.width) * params.height *
                                    GetGLBytesPerPixel(params.pixel_format) * params.depth);
 
     const auto& rect{params.GetRect()};
@@ -624,8 +634,9 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
     // Load data from memory to the surface
     const GLint x0 = static_cast<GLint>(rect.left);
     const GLint y0 = static_cast<GLint>(rect.bottom);
-    const size_t buffer_offset =
-        static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) *
+    const std::size_t buffer_offset =
+        static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
+                                 static_cast<std::size_t>(x0)) *
         GetGLBytesPerPixel(params.pixel_format);
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
@@ -727,7 +738,7 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
     return GetSurface(depth_params, preserve_contents);
 }
 
-Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) {
+Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {
     const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
 
     ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
@@ -825,7 +836,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
         auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
         auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
 
-        size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
+        std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
 
         glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);
         glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
@@ -849,7 +860,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
                 LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
                                   "reinterpretation but the texture is tiled.");
             }
-            size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
+            std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
             std::vector<u8> data(remaining_size);
             Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size());
             glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 57ea8593b..80c5f324b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -70,19 +70,20 @@ struct SurfaceParams {
         RG8S = 42,
         RG32UI = 43,
         R32UI = 44,
+        ASTC_2D_8X8 = 45,
 
         MaxColorFormat,
 
         // Depth formats
-        Z32F = 45,
-        Z16 = 46,
+        Z32F = 46,
+        Z16 = 47,
 
         MaxDepthFormat,
 
         // DepthStencil formats
-        Z24S8 = 47,
-        S8Z24 = 48,
-        Z32FS8 = 49,
+        Z24S8 = 48,
+        S8Z24 = 49,
+        Z32FS8 = 50,
 
         MaxDepthStencilFormat,
 
@@ -90,7 +91,7 @@ struct SurfaceParams {
         Invalid = 255,
     };
 
-    static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max);
+    static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
 
     enum class ComponentType {
         Invalid = 0,
@@ -136,6 +137,27 @@ struct SurfaceParams {
         }
     }
 
+    static std::string SurfaceTargetName(SurfaceTarget target) {
+        switch (target) {
+        case SurfaceTarget::Texture1D:
+            return "Texture1D";
+        case SurfaceTarget::Texture2D:
+            return "Texture2D";
+        case SurfaceTarget::Texture3D:
+            return "Texture3D";
+        case SurfaceTarget::Texture1DArray:
+            return "Texture1DArray";
+        case SurfaceTarget::Texture2DArray:
+            return "Texture2DArray";
+        case SurfaceTarget::TextureCubemap:
+            return "TextureCubemap";
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
+            UNREACHABLE();
+            return fmt::format("TextureUnknown({})", static_cast<u32>(target));
+        }
+    }
+
     /**
      * Gets the compression factor for the specified PixelFormat. This applies to just the
      * "compressed width" and "compressed height", not the overall compression factor of a
@@ -192,6 +214,7 @@ struct SurfaceParams {
             1, // RG8S
             1, // RG32UI
             1, // R32UI
+            4, // ASTC_2D_8X8
             1, // Z32F
             1, // Z16
             1, // Z24S8
@@ -199,8 +222,8 @@ struct SurfaceParams {
             1, // Z32FS8
         }};
 
-        ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
-        return compression_factor_table[static_cast<size_t>(format)];
+        ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
+        return compression_factor_table[static_cast<std::size_t>(format)];
     }
 
     static constexpr u32 GetFormatBpp(PixelFormat format) {
@@ -253,6 +276,7 @@ struct SurfaceParams {
             16,  // RG8S
             64,  // RG32UI
             32,  // R32UI
+            16,  // ASTC_2D_8X8
             32,  // Z32F
             16,  // Z16
             32,  // Z24S8
@@ -260,8 +284,8 @@ struct SurfaceParams {
             64,  // Z32FS8
         }};
 
-        ASSERT(static_cast<size_t>(format) < bpp_table.size());
-        return bpp_table[static_cast<size_t>(format)];
+        ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
+        return bpp_table[static_cast<std::size_t>(format)];
     }
 
     u32 GetFormatBpp() const {
@@ -316,6 +340,8 @@ struct SurfaceParams {
             return PixelFormat::R11FG11FB10F;
         case Tegra::RenderTargetFormat::B5G6R5_UNORM:
             return PixelFormat::B5G6R5U;
+        case Tegra::RenderTargetFormat::BGR5A1_UNORM:
+            return PixelFormat::A1B5G5R5U;
         case Tegra::RenderTargetFormat::RGBA32_UINT:
             return PixelFormat::RGBA32UI;
         case Tegra::RenderTargetFormat::R8_UNORM:
@@ -522,6 +548,8 @@ struct SurfaceParams {
             return PixelFormat::BC6H_SF16;
         case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
             return PixelFormat::ASTC_2D_4X4;
+        case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
+            return PixelFormat::ASTC_2D_8X8;
         case Tegra::Texture::TextureFormat::R16_G16:
             switch (component_type) {
             case Tegra::Texture::ComponentType::FLOAT:
@@ -576,6 +604,7 @@ struct SurfaceParams {
         case Tegra::RenderTargetFormat::RG16_UNORM:
         case Tegra::RenderTargetFormat::R16_UNORM:
         case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+        case Tegra::RenderTargetFormat::BGR5A1_UNORM:
         case Tegra::RenderTargetFormat::RG8_UNORM:
         case Tegra::RenderTargetFormat::RGBA16_UNORM:
             return ComponentType::UNorm;
@@ -636,16 +665,18 @@ struct SurfaceParams {
     }
 
     static SurfaceType GetFormatType(PixelFormat pixel_format) {
-        if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) {
+        if (static_cast<std::size_t>(pixel_format) <
+            static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
             return SurfaceType::ColorTexture;
         }
 
-        if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) {
+        if (static_cast<std::size_t>(pixel_format) <
+            static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {
             return SurfaceType::Depth;
         }
 
-        if (static_cast<size_t>(pixel_format) <
-            static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) {
+        if (static_cast<std::size_t>(pixel_format) <
+            static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
             return SurfaceType::DepthStencil;
         }
 
@@ -659,7 +690,7 @@ struct SurfaceParams {
     MathUtil::Rectangle<u32> GetRect() const;
 
     /// Returns the size of this surface in bytes, adjusted for compression
-    size_t SizeInBytes() const {
+    std::size_t SizeInBytes() const {
         const u32 compression_factor{GetCompressionFactor(pixel_format)};
         ASSERT(width % compression_factor == 0);
         ASSERT(height % compression_factor == 0);
@@ -671,7 +702,7 @@ struct SurfaceParams {
     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
 
     /// Creates SurfaceParams from a framebuffer configuration
-    static SurfaceParams CreateForFramebuffer(size_t index);
+    static SurfaceParams CreateForFramebuffer(std::size_t index);
 
     /// Creates SurfaceParams for a depth buffer configuration
     static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
@@ -694,7 +725,7 @@ struct SurfaceParams {
     u32 height;
     u32 depth;
     u32 unaligned_height;
-    size_t size_in_bytes;
+    std::size_t size_in_bytes;
     SurfaceTarget target;
 };
 
@@ -711,7 +742,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
 namespace std {
 template <>
 struct hash<SurfaceReserveKey> {
-    size_t operator()(const SurfaceReserveKey& k) const {
+    std::size_t operator()(const SurfaceReserveKey& k) const {
         return k.Hash();
     }
 };
@@ -727,7 +758,7 @@ public:
         return params.addr;
     }
 
-    size_t GetSizeInBytes() const {
+    std::size_t GetSizeInBytes() const {
         return params.size_in_bytes;
     }
 
@@ -775,7 +806,7 @@ public:
     Surface GetDepthBufferSurface(bool preserve_contents);
 
     /// Get the color surface based on the framebuffer configuration and the specified render target
-    Surface GetColorBufferSurface(size_t index, bool preserve_contents);
+    Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
 
     /// Flushes the surface to Switch memory
     void FlushSurface(const Surface& surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 61080f5cc..7cd8f91e4 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -8,13 +8,14 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/utils.h"
 
 namespace OpenGL {
 
 /// Gets the address for the specified shader stage program
 static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
     const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
+    const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
     return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
                                                shader_config.offset);
 }
@@ -28,7 +29,7 @@ static GLShader::ProgramCode GetShaderCode(VAddr addr) {
 
 /// Helper function to set shader uniform block bindings for a single shader stage
 static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
-                                         Maxwell::ShaderStage binding, size_t expected_size) {
+                                         Maxwell::ShaderStage binding, std::size_t expected_size) {
     const GLuint ub_index = glGetUniformBlockIndex(shader, name);
     if (ub_index == GL_INVALID_INDEX) {
         return;
@@ -36,7 +37,7 @@ static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
 
     GLint ub_size = 0;
     glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
-    ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size,
+    ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
                "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
     glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
 }
@@ -83,6 +84,7 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
     shader.Create(program_result.first.c_str(), gl_type);
     program.Create(true, shader.handle);
     SetShaderUniformBlockBindings(program.handle);
+    VideoCore::LabelGLObject(GL_PROGRAM, program.handle, addr);
 }
 
 GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6e6febcbc..9bafe43a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -28,7 +28,7 @@ public:
     }
 
     /// Gets the size of the shader in guest memory, required for cache management
-    size_t GetSizeInBytes() const {
+    std::size_t GetSizeInBytes() const {
         return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2d56370c7..b3e95187e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -12,6 +12,7 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
+#include "video_core/engines/shader_header.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 
@@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;
 using Tegra::Shader::SubOp;
 
 constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
-constexpr u32 PROGRAM_HEADER_SIZE = 0x50;
+constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
 
 class DecompileFail : public std::runtime_error {
 public:
@@ -189,7 +190,7 @@ public:
 
 private:
     void AppendIndentation() {
-        shader_source.append(static_cast<size_t>(scope) * 4, ' ');
+        shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
     }
 
     std::string shader_source;
@@ -208,7 +209,7 @@ public:
         UnsignedInteger,
     };
 
-    GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
+    GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
 
     /// Gets the GLSL type string for a register
     static std::string GetTypeString() {
@@ -226,15 +227,23 @@ public:
     }
 
     /// Returns the index of the register
-    size_t GetIndex() const {
+    std::size_t GetIndex() const {
         return index;
     }
 
 private:
-    const size_t index;
+    const std::size_t index;
     const std::string& suffix;
 };
 
+enum class InternalFlag : u64 {
+    ZeroFlag = 0,
+    CarryFlag = 1,
+    OverflowFlag = 2,
+    NaNFlag = 3,
+    Amount
+};
+
 /**
  * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state
  * of all registers (e.g. whether they are currently being used as Floats or Integers), and
@@ -328,13 +337,19 @@ public:
     void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
                               const std::string& value, u64 dest_num_components,
                               u64 value_num_components, bool is_saturated = false,
-                              u64 dest_elem = 0, Register::Size size = Register::Size::Word) {
+                              u64 dest_elem = 0, Register::Size size = Register::Size::Word,
+                              bool sets_cc = false) {
         ASSERT_MSG(!is_saturated, "Unimplemented");
 
         const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
 
         SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
                     dest_num_components, value_num_components, dest_elem);
+
+        if (sets_cc) {
+            const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
+            SetInternalFlag(InternalFlag::ZeroFlag, zero_condition);
+        }
     }
 
     /**
@@ -351,6 +366,26 @@ public:
         shader.AddLine(dest + " = " + src + ';');
     }
 
+    std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
+        switch (cc) {
+        case Tegra::Shader::ControlCode::NEU:
+            return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')';
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented Control Code {}", static_cast<u32>(cc));
+            UNREACHABLE();
+            return "false";
+        }
+    }
+
+    std::string GetInternalFlag(const InternalFlag ii) const {
+        const u32 code = static_cast<u32>(ii);
+        return "internalFlag_" + std::to_string(code) + suffix;
+    }
+
+    void SetInternalFlag(const InternalFlag ii, const std::string& value) const {
+        shader.AddLine(GetInternalFlag(ii) + " = " + value + ';');
+    }
+
     /**
      * Writes code that does a output attribute assignment to register operation. Output attributes
      * are stored as floats, so this may require conversion.
@@ -414,6 +449,12 @@ public:
         }
         declarations.AddNewLine();
 
+        for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
+            const InternalFlag code = static_cast<InternalFlag>(ii);
+            declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
+        }
+        declarations.AddNewLine();
+
         for (const auto element : declr_input_attribute) {
             // TODO(bunnei): Use proper number of elements for these
             u32 idx =
@@ -468,7 +509,7 @@ public:
     /// necessary.
     std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
                               bool is_array) {
-        const size_t offset = static_cast<size_t>(sampler.index.Value());
+        const std::size_t offset = static_cast<std::size_t>(sampler.index.Value());
 
         // If this sampler has already been used, return the existing mapping.
         const auto itr =
@@ -481,7 +522,7 @@ public:
         }
 
         // Otherwise create a new mapping for this sampler
-        const size_t next_index = used_samplers.size();
+        const std::size_t next_index = used_samplers.size();
         const SamplerEntry entry{stage, offset, next_index, type, is_array};
         used_samplers.emplace_back(entry);
         return entry.GetName();
@@ -531,7 +572,7 @@ private:
     void BuildRegisterList() {
         regs.reserve(Register::NumRegisters);
 
-        for (size_t index = 0; index < Register::NumRegisters; ++index) {
+        for (std::size_t index = 0; index < Register::NumRegisters; ++index) {
             regs.emplace_back(index, suffix);
         }
     }
@@ -674,7 +715,7 @@ public:
                   u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
         : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
           stage(stage), suffix(suffix) {
-
+        std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
         Generate(suffix);
     }
 
@@ -688,23 +729,6 @@ public:
     }
 
 private:
-    // Shader program header for a Fragment Shader.
-    struct FragmentHeader {
-        INSERT_PADDING_WORDS(5);
-        INSERT_PADDING_WORDS(13);
-        u32 enabled_color_outputs;
-        union {
-            BitField<0, 1, u32> writes_samplemask;
-            BitField<1, 1, u32> writes_depth;
-        };
-
-        bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
-            const u32 bit = render_target * 4 + component;
-            return enabled_color_outputs & (1 << bit);
-        }
-    };
-    static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong");
-
     /// Gets the Subroutine object corresponding to the specified address.
     const Subroutine& GetSubroutine(u32 begin, u32 end) const {
         const auto iter = subroutines.find(Subroutine{begin, end, suffix});
@@ -862,7 +886,7 @@ private:
      */
     bool IsSchedInstruction(u32 offset) const {
         // sched instructions appear once every 4 instructions.
-        static constexpr size_t SchedPeriod = 4;
+        static constexpr std::size_t SchedPeriod = 4;
         u32 absolute_offset = offset - main_offset;
 
         return (absolute_offset % SchedPeriod) == 0;
@@ -930,7 +954,7 @@ private:
         std::string result;
         result += '(';
 
-        for (size_t i = 0; i < shift_amounts.size(); ++i) {
+        for (std::size_t i = 0; i < shift_amounts.size(); ++i) {
             if (i)
                 result += '|';
             result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] +
@@ -954,9 +978,7 @@ private:
         // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
         // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
 
-        ASSERT_MSG(instr.texs.nodep == 0, "TEXS nodep not implemented");
-
-        size_t written_components = 0;
+        std::size_t written_components = 0;
         for (u32 component = 0; component < 4; ++component) {
             if (!instr.texs.IsComponentEnabled(component)) {
                 continue;
@@ -1010,10 +1032,8 @@ private:
     /// Writes the output values from a fragment shader to the corresponding GLSL output variables.
     void EmitFragmentOutputsWrite() {
         ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
-        FragmentHeader header;
-        std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE);
 
-        ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented");
+        ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");
 
         // Write the color outputs using the data in the shader registers, disabled
         // rendertargets/components are skipped in the register assignment.
@@ -1022,7 +1042,7 @@ private:
              ++render_target) {
             // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
             for (u32 component = 0; component < 4; ++component) {
-                if (header.IsColorComponentOutputEnabled(render_target, component)) {
+                if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
                     shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
                                                regs.GetRegisterAsFloat(current_reg)));
                     ++current_reg;
@@ -1030,7 +1050,7 @@ private:
             }
         }
 
-        if (header.writes_depth) {
+        if (header.ps.omap.depth) {
             // The depth output is always 2 registers after the last color output, and current_reg
             // already contains one past the last color register.
 
@@ -1510,8 +1530,6 @@ private:
             case OpCode::Id::LEA_IMM:
             case OpCode::Id::LEA_RZ:
             case OpCode::Id::LEA_HI: {
-                std::string op_a;
-                std::string op_b;
                 std::string op_c;
 
                 switch (opcode->GetId()) {
@@ -1642,7 +1660,8 @@ private:
                 }
 
                 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
-                                          1, instr.alu.saturate_d, 0, instr.conversion.dest_size);
+                                          1, instr.alu.saturate_d, 0, instr.conversion.dest_size,
+                                          instr.generates_cc.Value() != 0);
                 break;
             }
             case OpCode::Id::I2F_R:
@@ -1772,13 +1791,34 @@ private:
         case OpCode::Type::Memory: {
             switch (opcode->GetId()) {
             case OpCode::Id::LD_A: {
-                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
+                ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+                           "Indirect attribute loads are not supported");
+                ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+                           "Unaligned attribute loads are not supported");
 
                 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
                                                   Tegra::Shader::IpaSampleMode::Default};
-                regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
-                                                instr.attribute.fmt20.index, input_mode);
+
+                u64 next_element = instr.attribute.fmt20.element;
+                u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+                const auto LoadNextElement = [&](u32 reg_offset) {
+                    regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
+                                                    static_cast<Attribute::Index>(next_index),
+                                                    input_mode);
+
+                    // Load the next attribute element into the following register. If the element
+                    // to load goes beyond the vec4 size, load the first element of the next
+                    // attribute.
+                    next_element = (next_element + 1) % 4;
+                    next_index = next_index + (next_element == 0 ? 1 : 0);
+                };
+
+                const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+                for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+                    LoadNextElement(reg_offset);
+                }
                 break;
             }
             case OpCode::Id::LD_C: {
@@ -1820,9 +1860,31 @@ private:
                 break;
             }
             case OpCode::Id::ST_A: {
-                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
-                regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
-                                                  instr.attribute.fmt20.element, instr.gpr0);
+                ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+                           "Indirect attribute loads are not supported");
+                ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+                           "Unaligned attribute loads are not supported");
+
+                u64 next_element = instr.attribute.fmt20.element;
+                u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+                const auto StoreNextElement = [&](u32 reg_offset) {
+                    regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
+                                                      next_element,
+                                                      instr.gpr0.Value() + reg_offset);
+
+                    // Load the next attribute element into the following register. If the element
+                    // to load goes beyond the vec4 size, load the first element of the next
+                    // attribute.
+                    next_element = (next_element + 1) % 4;
+                    next_index = next_index + (next_element == 0 ? 1 : 0);
+                };
+
+                const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+                for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+                    StoreNextElement(reg_offset);
+                }
+
                 break;
             }
             case OpCode::Id::TEX: {
@@ -1830,6 +1892,13 @@ private:
                 Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
                 std::string coord;
 
+                ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+                           "NODEP is not implemented");
+                ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+                           "AOFFI is not implemented");
+                ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+                           "DC is not implemented");
+
                 switch (texture_type) {
                 case Tegra::Shader::TextureType::Texture1D: {
                     const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
@@ -1894,8 +1963,8 @@ private:
                     UNREACHABLE();
                 }
                 }
-                size_t dest_elem{};
-                for (size_t elem = 0; elem < 4; ++elem) {
+                std::size_t dest_elem{};
+                for (std::size_t elem = 0; elem < 4; ++elem) {
                     if (!instr.tex.IsComponentEnabled(elem)) {
                         // Skip disabled components
                         continue;
@@ -1912,6 +1981,11 @@ private:
                 Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
                 bool is_array{instr.texs.IsArrayTexture()};
 
+                ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+                           "NODEP is not implemented");
+                ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+                           "DC is not implemented");
+
                 switch (texture_type) {
                 case Tegra::Shader::TextureType::Texture2D: {
                     if (is_array) {
@@ -1948,6 +2022,13 @@ private:
                 ASSERT(instr.tlds.IsArrayTexture() == false);
                 std::string coord;
 
+                ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+                           "NODEP is not implemented");
+                ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+                           "AOFFI is not implemented");
+                ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
+                           "MZ is not implemented");
+
                 switch (instr.tlds.GetTextureType()) {
                 case Tegra::Shader::TextureType::Texture2D: {
                     if (instr.tlds.IsArrayTexture()) {
@@ -1976,6 +2057,17 @@ private:
                 ASSERT(instr.tld4.array == 0);
                 std::string coord;
 
+                ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+                           "NODEP is not implemented");
+                ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+                           "AOFFI is not implemented");
+                ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+                           "DC is not implemented");
+                ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+                           "NDV is not implemented");
+                ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
+                           "PTP is not implemented");
+
                 switch (instr.tld4.texture_type) {
                 case Tegra::Shader::TextureType::Texture2D: {
                     const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
@@ -1999,8 +2091,8 @@ private:
                 const std::string texture = "textureGather(" + sampler + ", coords, " +
                                             std::to_string(instr.tld4.component) + ')';
 
-                size_t dest_elem{};
-                for (size_t elem = 0; elem < 4; ++elem) {
+                std::size_t dest_elem{};
+                for (std::size_t elem = 0; elem < 4; ++elem) {
                     if (!instr.tex.IsComponentEnabled(elem)) {
                         // Skip disabled components
                         continue;
@@ -2013,6 +2105,13 @@ private:
                 break;
             }
             case OpCode::Id::TLD4S: {
+                ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+                           "NODEP is not implemented");
+                ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+                           "AOFFI is not implemented");
+                ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+                           "DC is not implemented");
+
                 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
                 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
                 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
@@ -2025,6 +2124,9 @@ private:
                 break;
             }
             case OpCode::Id::TXQ: {
+                ASSERT_MSG(!instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+                           "NODEP is not implemented");
+
                 // TODO: the new commits on the texture refactor, change the way samplers work.
                 // Sadly, not all texture instructions specify the type of texture their sampler
                 // uses. This must be fixed at a later instance.
@@ -2045,6 +2147,11 @@ private:
                 break;
             }
             case OpCode::Id::TMML: {
+                ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+                           "NODEP is not implemented");
+                ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+                           "NDV is not implemented");
+
                 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
                 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
                 const bool is_array = instr.tmml.array != 0;
@@ -2211,31 +2318,55 @@ private:
             break;
         }
         case OpCode::Type::PredicateSetPredicate: {
-            const std::string op_a =
-                GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
-            const std::string op_b =
-                GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+            switch (opcode->GetId()) {
+            case OpCode::Id::PSETP: {
+                const std::string op_a =
+                    GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+                const std::string op_b =
+                    GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
 
-            // We can't use the constant predicate as destination.
-            ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+                // We can't use the constant predicate as destination.
+                ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
 
-            const std::string second_pred =
-                GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+                const std::string second_pred =
+                    GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
 
-            const std::string combiner = GetPredicateCombiner(instr.psetp.op);
+                const std::string combiner = GetPredicateCombiner(instr.psetp.op);
 
-            const std::string predicate =
-                '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
+                const std::string predicate =
+                    '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
 
-            // Set the primary predicate to the result of Predicate OP SecondPredicate
-            SetPredicate(instr.psetp.pred3,
-                         '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+                // Set the primary predicate to the result of Predicate OP SecondPredicate
+                SetPredicate(instr.psetp.pred3,
+                             '(' + predicate + ") " + combiner + " (" + second_pred + ')');
 
-            if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-                // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
-                // if enabled
-                SetPredicate(instr.psetp.pred0,
-                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+                if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+                    // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+                    // if enabled
+                    SetPredicate(instr.psetp.pred0,
+                                 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+                }
+                break;
+            }
+            case OpCode::Id::CSETP: {
+                const std::string pred =
+                    GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
+                const std::string combiner = GetPredicateCombiner(instr.csetp.op);
+                const std::string controlCode = regs.GetControlCode(instr.csetp.cc);
+                if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
+                    SetPredicate(instr.csetp.pred3,
+                                 '(' + controlCode + ") " + combiner + " (" + pred + ')');
+                }
+                if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+                    SetPredicate(instr.csetp.pred0,
+                                 "!(" + controlCode + ") " + combiner + " (" + pred + ')');
+                }
+                break;
+            }
+            default: {
+                LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName());
+                UNREACHABLE();
+            }
             }
             break;
         }
@@ -2625,6 +2756,7 @@ private:
 private:
     const std::set<Subroutine>& subroutines;
     const ProgramCode& program_code;
+    Tegra::Shader::Header header;
     const u32 main_offset;
     Maxwell3D::Regs::ShaderStage stage;
     const std::string& suffix;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index a43e2997b..d53b93ad5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -13,7 +13,7 @@
 
 namespace OpenGL::GLShader {
 
-constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
+constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
 using ProgramCode = std::vector<u64>;
 
 class ConstBufferEntry {
@@ -51,7 +51,7 @@ public:
     }
 
     std::string GetName() const {
-        return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index);
+        return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
     }
 
     u32 GetHash() const {
@@ -74,15 +74,15 @@ class SamplerEntry {
     using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 public:
-    SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index,
+    SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
                  Tegra::Shader::TextureType type, bool is_array)
         : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {}
 
-    size_t GetOffset() const {
+    std::size_t GetOffset() const {
         return offset;
     }
 
-    size_t GetIndex() const {
+    std::size_t GetIndex() const {
         return sampler_index;
     }
 
@@ -91,7 +91,7 @@ public:
     }
 
     std::string GetName() const {
-        return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' +
+        return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
                std::to_string(sampler_index);
     }
 
@@ -133,7 +133,7 @@ public:
     }
 
     static std::string GetArrayName(Maxwell::ShaderStage stage) {
-        return TextureSamplerNames[static_cast<size_t>(stage)];
+        return TextureSamplerNames[static_cast<std::size_t>(stage)];
     }
 
 private:
@@ -143,9 +143,9 @@ private:
 
     /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
     /// instruction.
-    size_t offset;
+    std::size_t offset;
     Maxwell::ShaderStage stage;      ///< Shader stage where this sampler was used.
-    size_t sampler_index;            ///< Value used to index into the generated GLSL sampler array.
+    std::size_t sampler_index;       ///< Value used to index into the generated GLSL sampler array.
     Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
     bool is_array; ///< Whether the texture is being sampled as an array texture or not.
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 533e42caa..3de15ba9b 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -11,9 +11,6 @@
 
 namespace OpenGL::GLShader {
 
-/// Number of OpenGL texture samplers that can be used in the fragment shader
-static constexpr size_t NumTextureSamplers = 32;
-
 using Tegra::Engines::Maxwell3D;
 
 /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 6f70deb96..1fe26a2a9 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -4,6 +4,7 @@
 
 #include <iterator>
 #include <glad/glad.h>
+#include "common/assert.h"
 #include "common/logging/log.h"
 #include "video_core/renderer_opengl/gl_state.h"
 
@@ -78,6 +79,8 @@ OpenGLState::OpenGLState() {
     viewport.height = 0;
 
     clip_distance = {};
+
+    point.size = 1;
 }
 
 void OpenGLState::Apply() const {
@@ -204,9 +207,6 @@ void OpenGLState::Apply() const {
             glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum());
             glBindTexture(texture_unit.target, texture_unit.texture);
         }
-        if (texture_unit.sampler != cur_state_texture_unit.sampler) {
-            glBindSampler(static_cast<GLuint>(i), texture_unit.sampler);
-        }
         // Update the texture swizzle
         if (texture_unit.swizzle.r != cur_state_texture_unit.swizzle.r ||
             texture_unit.swizzle.g != cur_state_texture_unit.swizzle.g ||
@@ -218,6 +218,27 @@ void OpenGLState::Apply() const {
         }
     }
 
+    // Samplers
+    {
+        bool has_delta{};
+        std::size_t first{}, last{};
+        std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
+        for (std::size_t i = 0; i < std::size(samplers); ++i) {
+            samplers[i] = texture_units[i].sampler;
+            if (samplers[i] != cur_state.texture_units[i].sampler) {
+                if (!has_delta) {
+                    first = i;
+                    has_delta = true;
+                }
+                last = i;
+            }
+        }
+        if (has_delta) {
+            glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
+                           samplers.data());
+        }
+    }
+
     // Framebuffer
     if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
         glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
@@ -272,7 +293,7 @@ void OpenGLState::Apply() const {
     }
 
     // Clip distance
-    for (size_t i = 0; i < clip_distance.size(); ++i) {
+    for (std::size_t i = 0; i < clip_distance.size(); ++i) {
         if (clip_distance[i] != cur_state.clip_distance[i]) {
             if (clip_distance[i]) {
                 glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
@@ -282,6 +303,11 @@ void OpenGLState::Apply() const {
         }
     }
 
+    // Point
+    if (point.size != cur_state.point.size) {
+        glPointSize(point.size);
+    }
+
     cur_state = *this;
 }
 
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index e3e24b9e7..dc21a2ee3 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -6,13 +6,10 @@
 
 #include <array>
 #include <glad/glad.h>
-
 #include "video_core/engines/maxwell_3d.h"
 
 namespace OpenGL {
 
-using Regs = Tegra::Engines::Maxwell3D::Regs;
-
 namespace TextureUnits {
 
 struct TextureUnit {
@@ -118,7 +115,7 @@ public:
             target = GL_TEXTURE_2D;
         }
     };
-    std::array<TextureUnit, 32> texture_units;
+    std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;
 
     struct {
         GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
@@ -145,6 +142,10 @@ public:
         GLsizei height;
     } viewport;
 
+    struct {
+        float size; // GL_POINT_SIZE
+    } point;
+
     std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE
 
     OpenGLState();
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index aadf68f16..e409228cc 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -61,7 +61,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
     mapped_size = size;
 
     if (alignment > 0) {
-        buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment);
+        buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
     }
 
     bool invalidate = false;
@@ -74,7 +74,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
         }
     }
 
-    if (invalidate | !persistent) {
+    if (invalidate || !persistent) {
         GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
                            (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
                            (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 272294c62..3d5476e5d 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -13,39 +13,101 @@
 namespace Tegra::Texture {
 
 /**
+ * This table represents the internal swizzle of a gob,
+ * in format 16 bytes x 2 sector packing.
  * Calculates the offset of an (x, y) position within a swizzled texture.
- * Taken from the Tegra X1 TRM.
+ * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
  */
-static u32 GetSwizzleOffset(u32 x, u32 y, u32 image_width, u32 bytes_per_pixel, u32 block_height) {
-    // Round up to the next gob
-    const u32 image_width_in_gobs{(image_width * bytes_per_pixel + 63) / 64};
+template <std::size_t N, std::size_t M, u32 Align>
+struct alignas(64) SwizzleTable {
+    static_assert(M * Align == 64, "Swizzle Table does not align to GOB");
+    constexpr SwizzleTable() {
+        for (u32 y = 0; y < N; ++y) {
+            for (u32 x = 0; x < M; ++x) {
+                const u32 x2 = x * Align;
+                values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
+                                                ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16));
+            }
+        }
+    }
+    const std::array<u16, M>& operator[](std::size_t index) const {
+        return values[index];
+    }
+    std::array<std::array<u16, M>, N> values{};
+};
 
-    u32 GOB_address = 0 + (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs +
-                      (x * bytes_per_pixel / 64) * 512 * block_height +
-                      (y % (8 * block_height) / 8) * 512;
-    x *= bytes_per_pixel;
-    u32 address = GOB_address + ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
-                  (y % 2) * 16 + (x % 16);
+constexpr auto legacy_swizzle_table = SwizzleTable<8, 64, 1>();
+constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>();
 
-    return address;
-}
-
-void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
-                      u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height) {
-    u8* data_ptrs[2];
-    for (unsigned y = 0; y < height; ++y) {
-        for (unsigned x = 0; x < width; ++x) {
-            u32 swizzle_offset = GetSwizzleOffset(x, y, width, bytes_per_pixel, block_height);
-            u32 pixel_index = (x + y * width) * out_bytes_per_pixel;
+static void LegacySwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
+                              u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
+                              u32 block_height) {
+    std::array<u8*, 2> data_ptrs;
+    const std::size_t stride = width * bytes_per_pixel;
+    const std::size_t gobs_in_x = 64;
+    const std::size_t gobs_in_y = 8;
+    const std::size_t gobs_size = gobs_in_x * gobs_in_y;
+    const std::size_t image_width_in_gobs{(stride + gobs_in_x - 1) / gobs_in_x};
+    for (std::size_t y = 0; y < height; ++y) {
+        const std::size_t gob_y_address =
+            (y / (gobs_in_y * block_height)) * gobs_size * block_height * image_width_in_gobs +
+            (y % (gobs_in_y * block_height) / gobs_in_y) * gobs_size;
+        const auto& table = legacy_swizzle_table[y % gobs_in_y];
+        for (std::size_t x = 0; x < width; ++x) {
+            const std::size_t gob_address =
+                gob_y_address + (x * bytes_per_pixel / gobs_in_x) * gobs_size * block_height;
+            const std::size_t x2 = x * bytes_per_pixel;
+            const std::size_t swizzle_offset = gob_address + table[x2 % gobs_in_x];
+            const std::size_t pixel_index = (x + y * width) * out_bytes_per_pixel;
 
             data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
-            data_ptrs[!unswizzle] = &unswizzled_data[pixel_index];
+            data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
 
             std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
         }
     }
 }
 
+static void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
+                            u8* swizzled_data, u8* unswizzled_data, bool unswizzle,
+                            u32 block_height) {
+    std::array<u8*, 2> data_ptrs;
+    const std::size_t stride{width * bytes_per_pixel};
+    const std::size_t gobs_in_x = 64;
+    const std::size_t gobs_in_y = 8;
+    const std::size_t gobs_size = gobs_in_x * gobs_in_y;
+    const std::size_t image_width_in_gobs{(stride + gobs_in_x - 1) / gobs_in_x};
+    const std::size_t copy_size{16};
+    for (std::size_t y = 0; y < height; ++y) {
+        const std::size_t initial_gob =
+            (y / (gobs_in_y * block_height)) * gobs_size * block_height * image_width_in_gobs +
+            (y % (gobs_in_y * block_height) / gobs_in_y) * gobs_size;
+        const std::size_t pixel_base{y * width * out_bytes_per_pixel};
+        const auto& table = fast_swizzle_table[y % gobs_in_y];
+        for (std::size_t xb = 0; xb < stride; xb += copy_size) {
+            const std::size_t gob_address{initial_gob +
+                                          (xb / gobs_in_x) * gobs_size * block_height};
+            const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]};
+            const std::size_t out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
+            const std::size_t pixel_index{out_x + pixel_base};
+            data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
+            data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
+            std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
+        }
+    }
+}
+
+void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
+                      u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height) {
+    if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) {
+        FastSwizzleData(width, height, bytes_per_pixel, out_bytes_per_pixel, swizzled_data,
+                        unswizzled_data, unswizzle, block_height);
+    } else {
+        LegacySwizzleData(width, height, bytes_per_pixel, out_bytes_per_pixel, swizzled_data,
+                          unswizzled_data, unswizzle, block_height);
+    }
+}
+
 u32 BytesPerPixel(TextureFormat format) {
     switch (format) {
     case TextureFormat::DXT1:
@@ -63,6 +125,7 @@ u32 BytesPerPixel(TextureFormat format) {
     case TextureFormat::R32_G32_B32:
         return 12;
     case TextureFormat::ASTC_2D_4X4:
+    case TextureFormat::ASTC_2D_8X8:
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
     case TextureFormat::BF10GF11RF11:
@@ -111,6 +174,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
     case TextureFormat::BC6H_UF16:
     case TextureFormat::BC6H_SF16:
     case TextureFormat::ASTC_2D_4X4:
+    case TextureFormat::ASTC_2D_8X8:
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
     case TextureFormat::A1B5G5R5:
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index e0a14d48f..681919ae3 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -161,4 +161,26 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe
     }
 }
 
+static void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr,
+                          std::string extra_info = "") {
+    if (!GLAD_GL_KHR_debug) {
+        return; // We don't need to throw an error as this is just for debugging
+    }
+    const std::string nice_addr = fmt::format("0x{:016x}", addr);
+    std::string object_label;
+
+    switch (identifier) {
+    case GL_TEXTURE:
+        object_label = extra_info + "@" + nice_addr;
+        break;
+    case GL_PROGRAM:
+        object_label = "ShaderProgram@" + nice_addr;
+        break;
+    default:
+        object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr);
+        break;
+    }
+    glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
+}
+
 } // namespace VideoCore
diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_gamelist.cpp
index 0be030434..8743ce982 100644
--- a/src/yuzu/configuration/configure_gamelist.cpp
+++ b/src/yuzu/configuration/configure_gamelist.cpp
@@ -89,7 +89,7 @@ void ConfigureGameList::InitializeIconSizeComboBox() {
 }
 
 void ConfigureGameList::InitializeRowComboBoxes() {
-    for (size_t i = 0; i < row_text_names.size(); ++i) {
+    for (std::size_t i = 0; i < row_text_names.size(); ++i) {
         ui->row_1_text_combobox->addItem(row_text_names[i], QVariant::fromValue(i));
         ui->row_2_text_combobox->addItem(row_text_names[i], QVariant::fromValue(i));
     }
diff --git a/src/yuzu/debugger/graphics/graphics_breakpoints.cpp b/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
index fe682b3b8..b5c88f944 100644
--- a/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
+++ b/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
@@ -42,7 +42,8 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const {
                  tr("Finished primitive batch")},
             };
 
-            DEBUG_ASSERT(map.size() == static_cast<size_t>(Tegra::DebugContext::Event::NumEvents));
+            DEBUG_ASSERT(map.size() ==
+                         static_cast<std::size_t>(Tegra::DebugContext::Event::NumEvents));
             return (map.find(event) != map.end()) ? map.at(event) : QString();
         }
 
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 7e37962d5..cbcd5dd5f 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -341,8 +341,8 @@ void GraphicsSurfaceWidget::OnUpdate() {
         // directly...
 
         const auto& registers = gpu.Maxwell3D().regs;
-        const auto& rt = registers.rt[static_cast<size_t>(surface_source) -
-                                      static_cast<size_t>(Source::RenderTarget0)];
+        const auto& rt = registers.rt[static_cast<std::size_t>(surface_source) -
+                                      static_cast<std::size_t>(Source::RenderTarget0)];
 
         surface_address = rt.Address();
         surface_width = rt.width;
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index dc1023113..a3b1fd357 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -15,6 +15,7 @@
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/timer.h"
 #include "core/hle/kernel/wait_object.h"
+#include "core/memory.h"
 
 WaitTreeItem::WaitTreeItem() = default;
 WaitTreeItem::~WaitTreeItem() = default;
@@ -117,7 +118,7 @@ QString WaitTreeCallstack::GetText() const {
 std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() const {
     std::vector<std::unique_ptr<WaitTreeItem>> list;
 
-    constexpr size_t BaseRegister = 29;
+    constexpr std::size_t BaseRegister = 29;
     u64 base_pointer = thread.context.cpu_registers[BaseRegister];
 
     while (base_pointer != 0) {
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 3b3b551bb..67890455a 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -26,10 +26,10 @@
 #include "yuzu/main.h"
 #include "yuzu/ui_settings.h"
 
-GameList::SearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {}
+GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {}
 
 // EventFilter in order to process systemkeys while editing the searchfield
-bool GameList::SearchField::KeyReleaseEater::eventFilter(QObject* obj, QEvent* event) {
+bool GameListSearchField::KeyReleaseEater::eventFilter(QObject* obj, QEvent* event) {
     // If it isn't a KeyRelease event then continue with standard event processing
     if (event->type() != QEvent::KeyRelease)
         return QObject::eventFilter(obj, event);
@@ -88,29 +88,21 @@ bool GameList::SearchField::KeyReleaseEater::eventFilter(QObject* obj, QEvent* e
     return QObject::eventFilter(obj, event);
 }
 
-void GameList::SearchField::setFilterResult(int visible, int total) {
-    QString result_of_text = tr("of");
-    QString result_text;
-    if (total == 1) {
-        result_text = tr("result");
-    } else {
-        result_text = tr("results");
-    }
-    label_filter_result->setText(
-        QString("%1 %2 %3 %4").arg(visible).arg(result_of_text).arg(total).arg(result_text));
+void GameListSearchField::setFilterResult(int visible, int total) {
+    label_filter_result->setText(tr("%1 of %n result(s)", "", total).arg(visible));
 }
 
-void GameList::SearchField::clear() {
+void GameListSearchField::clear() {
     edit_filter->setText("");
 }
 
-void GameList::SearchField::setFocus() {
+void GameListSearchField::setFocus() {
     if (edit_filter->isVisible()) {
         edit_filter->setFocus();
     }
 }
 
-GameList::SearchField::SearchField(GameList* parent) : QWidget{parent} {
+GameListSearchField::GameListSearchField(GameList* parent) : QWidget{parent} {
     KeyReleaseEater* keyReleaseEater = new KeyReleaseEater(parent);
     layout_filter = new QHBoxLayout;
     layout_filter->setMargin(8);
@@ -210,7 +202,7 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, GMainWindow* parent)
     this->main_window = parent;
     layout = new QVBoxLayout;
     tree_view = new QTreeView;
-    search_field = new SearchField(this);
+    search_field = new GameListSearchField(this);
     item_model = new QStandardItemModel(tree_view);
     tree_view->setModel(item_model);
 
@@ -326,9 +318,14 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
     int row = item_model->itemFromIndex(item)->row();
     QStandardItem* child_file = item_model->invisibleRootItem()->child(row, COLUMN_NAME);
     u64 program_id = child_file->data(GameListItemPath::ProgramIdRole).toULongLong();
+    std::string path = child_file->data(GameListItemPath::FullPathRole).toString().toStdString();
 
     QMenu context_menu;
     QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));
+    QAction* open_lfs_location = context_menu.addAction(tr("Open Mod Data Location"));
+    context_menu.addSeparator();
+    QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS"));
+    QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard"));
     QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry"));
 
     open_save_location->setEnabled(program_id != 0);
@@ -337,6 +334,10 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
 
     connect(open_save_location, &QAction::triggered,
             [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData); });
+    connect(open_lfs_location, &QAction::triggered,
+            [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::ModData); });
+    connect(dump_romfs, &QAction::triggered, [&]() { emit DumpRomFSRequested(program_id, path); });
+    connect(copy_tid, &QAction::triggered, [&]() { emit CopyTIDRequested(program_id); });
     connect(navigate_to_gamedb_entry, &QAction::triggered,
             [&]() { emit NavigateToGamedbEntryRequested(program_id, compatibility_list); });
 
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index 2713e7b54..05e115e19 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -22,13 +22,17 @@
 #include "yuzu/compatibility_list.h"
 
 class GameListWorker;
+class GameListSearchField;
 class GMainWindow;
 
 namespace FileSys {
 class VfsFilesystem;
 }
 
-enum class GameListOpenTarget { SaveData };
+enum class GameListOpenTarget {
+    SaveData,
+    ModData,
+};
 
 class GameList : public QWidget {
     Q_OBJECT
@@ -43,33 +47,6 @@ public:
         COLUMN_COUNT, // Number of columns
     };
 
-    class SearchField : public QWidget {
-    public:
-        void setFilterResult(int visible, int total);
-        void clear();
-        void setFocus();
-        explicit SearchField(GameList* parent = nullptr);
-
-    private:
-        class KeyReleaseEater : public QObject {
-        public:
-            explicit KeyReleaseEater(GameList* gamelist);
-
-        private:
-            GameList* gamelist = nullptr;
-            QString edit_filter_text_old;
-
-        protected:
-            bool eventFilter(QObject* obj, QEvent* event) override;
-        };
-        QHBoxLayout* layout_filter = nullptr;
-        QTreeView* tree_view = nullptr;
-        QLabel* label_filter = nullptr;
-        QLineEdit* edit_filter = nullptr;
-        QLabel* label_filter_result = nullptr;
-        QToolButton* button_filter_close = nullptr;
-    };
-
     explicit GameList(std::shared_ptr<FileSys::VfsFilesystem> vfs, GMainWindow* parent = nullptr);
     ~GameList() override;
 
@@ -89,6 +66,8 @@ signals:
     void GameChosen(QString game_path);
     void ShouldCancelWorker();
     void OpenFolderRequested(u64 program_id, GameListOpenTarget target);
+    void DumpRomFSRequested(u64 program_id, const std::string& game_path);
+    void CopyTIDRequested(u64 program_id);
     void NavigateToGamedbEntryRequested(u64 program_id,
                                         const CompatibilityList& compatibility_list);
 
@@ -105,7 +84,7 @@ private:
     void RefreshGameDirectory();
 
     std::shared_ptr<FileSys::VfsFilesystem> vfs;
-    SearchField* search_field;
+    GameListSearchField* search_field;
     GMainWindow* main_window = nullptr;
     QVBoxLayout* layout = nullptr;
     QTreeView* tree_view = nullptr;
@@ -113,6 +92,8 @@ private:
     GameListWorker* current_worker = nullptr;
     QFileSystemWatcher* watcher = nullptr;
     CompatibilityList compatibility_list;
+
+    friend class GameListSearchField;
 };
 
 Q_DECLARE_METATYPE(GameListOpenTarget);
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index f22e422e5..3db0e90da 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -16,6 +16,7 @@
 #include <QObject>
 #include <QStandardItem>
 #include <QString>
+#include <QWidget>
 
 #include "common/common_types.h"
 #include "common/logging/log.h"
@@ -68,7 +69,7 @@ public:
         if (!picture.loadFromData(picture_data.data(), static_cast<u32>(picture_data.size()))) {
             picture = GetDefaultIcon(size);
         }
-        picture = picture.scaled(size, size);
+        picture = picture.scaled(size, size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
 
         setData(picture, Qt::DecorationRole);
     }
@@ -106,7 +107,7 @@ class GameListItemCompat : public GameListItem {
 public:
     static const int CompatNumberRole = Qt::UserRole + 1;
     GameListItemCompat() = default;
-    explicit GameListItemCompat(const QString& compatiblity) {
+    explicit GameListItemCompat(const QString& compatibility) {
         struct CompatStatus {
             QString color;
             const char* text;
@@ -123,13 +124,13 @@ public:
         {"99", {"#000000", QT_TR_NOOP("Not Tested"), QT_TR_NOOP("The game has not yet been tested.")}}};
         // clang-format on
 
-        auto iterator = status_data.find(compatiblity);
+        auto iterator = status_data.find(compatibility);
         if (iterator == status_data.end()) {
-            LOG_WARNING(Frontend, "Invalid compatibility number {}", compatiblity.toStdString());
+            LOG_WARNING(Frontend, "Invalid compatibility number {}", compatibility.toStdString());
             return;
         }
-        CompatStatus status = iterator->second;
-        setData(compatiblity, CompatNumberRole);
+        const CompatStatus& status = iterator->second;
+        setData(compatibility, CompatNumberRole);
         setText(QObject::tr(status.text));
         setToolTip(QObject::tr(status.tooltip));
         setData(CreateCirclePixmapFromColor(status.color), Qt::DecorationRole);
@@ -176,3 +177,42 @@ public:
         return data(SizeRole).toULongLong() < other.data(SizeRole).toULongLong();
     }
 };
+
+class GameList;
+class QHBoxLayout;
+class QTreeView;
+class QLabel;
+class QLineEdit;
+class QToolButton;
+
+class GameListSearchField : public QWidget {
+    Q_OBJECT
+
+public:
+    explicit GameListSearchField(GameList* parent = nullptr);
+
+    void setFilterResult(int visible, int total);
+
+    void clear();
+    void setFocus();
+
+private:
+    class KeyReleaseEater : public QObject {
+    public:
+        explicit KeyReleaseEater(GameList* gamelist);
+
+    private:
+        GameList* gamelist = nullptr;
+        QString edit_filter_text_old;
+
+    protected:
+        // EventFilter in order to process systemkeys while editing the searchfield
+        bool eventFilter(QObject* obj, QEvent* event) override;
+    };
+    QHBoxLayout* layout_filter = nullptr;
+    QTreeView* tree_view = nullptr;
+    QLabel* label_filter = nullptr;
+    QLineEdit* edit_filter = nullptr;
+    QLabel* label_filter_result = nullptr;
+    QToolButton* button_filter_close = nullptr;
+};
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 22a317737..681758ad2 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -7,6 +7,22 @@
 #include <memory>
 #include <thread>
 
+// VFS includes must be before glad as they will conflict with Windows file api, which uses defines.
+#include "core/file_sys/vfs.h"
+#include "core/file_sys/vfs_real.h"
+
+// These are wrappers to avoid the calls to CreateDirectory and CreateFile becuase of the Windows
+// defines.
+static FileSys::VirtualDir VfsFilesystemCreateDirectoryWrapper(
+    const FileSys::VirtualFilesystem& vfs, const std::string& path, FileSys::Mode mode) {
+    return vfs->CreateDirectory(path, mode);
+}
+
+static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::VirtualDir& dir,
+                                                          const std::string& path) {
+    return dir->CreateFile(path);
+}
+
 #include <fmt/ostream.h>
 #include <glad/glad.h>
 
@@ -30,16 +46,18 @@
 #include "common/telemetry.h"
 #include "core/core.h"
 #include "core/crypto/key_manager.h"
+#include "core/file_sys/bis_factory.h"
 #include "core/file_sys/card_image.h"
 #include "core/file_sys/content_archive.h"
 #include "core/file_sys/control_metadata.h"
 #include "core/file_sys/patch_manager.h"
 #include "core/file_sys/registered_cache.h"
+#include "core/file_sys/romfs.h"
 #include "core/file_sys/savedata_factory.h"
 #include "core/file_sys/submission_package.h"
-#include "core/file_sys/vfs_real.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/service/filesystem/filesystem.h"
+#include "core/hle/service/filesystem/fsp_ldr.h"
 #include "core/loader/loader.h"
 #include "core/perf_stats.h"
 #include "core/settings.h"
@@ -362,6 +380,8 @@ void GMainWindow::RestoreUIState() {
 void GMainWindow::ConnectWidgetEvents() {
     connect(game_list, &GameList::GameChosen, this, &GMainWindow::OnGameListLoadFile);
     connect(game_list, &GameList::OpenFolderRequested, this, &GMainWindow::OnGameListOpenFolder);
+    connect(game_list, &GameList::DumpRomFSRequested, this, &GMainWindow::OnGameListDumpRomFS);
+    connect(game_list, &GameList::CopyTIDRequested, this, &GMainWindow::OnGameListCopyTID);
     connect(game_list, &GameList::NavigateToGamedbEntryRequested, this,
             &GMainWindow::OnGameListNavigateToGamedbEntry);
 
@@ -602,9 +622,9 @@ void GMainWindow::BootGame(const QString& filename) {
     std::string title_name;
     const auto res = Core::System::GetInstance().GetGameName(title_name);
     if (res != Loader::ResultStatus::Success) {
-        const u64 program_id = Core::System::GetInstance().CurrentProcess()->program_id;
+        const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
 
-        const auto [nacp, icon_file] = FileSys::PatchManager(program_id).GetControlMetadata();
+        const auto [nacp, icon_file] = FileSys::PatchManager(title_id).GetControlMetadata();
         if (nacp != nullptr)
             title_name = nacp->GetApplicationName();
 
@@ -713,6 +733,12 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target
                                                                 program_id, user_id, 0);
         break;
     }
+    case GameListOpenTarget::ModData: {
+        open_target = "Mod Data";
+        const auto load_dir = FileUtil::GetUserPath(FileUtil::UserPath::LoadDir);
+        path = fmt::format("{}{:016X}", load_dir, program_id);
+        break;
+    }
     default:
         UNIMPLEMENTED();
     }
@@ -730,6 +756,120 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target
     QDesktopServices::openUrl(QUrl::fromLocalFile(qpath));
 }
 
+static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) {
+    std::size_t out = 0;
+
+    for (const auto& subdir : dir->GetSubdirectories()) {
+        out += 1 + CalculateRomFSEntrySize(subdir, full);
+    }
+
+    return out + (full ? dir->GetFiles().size() : 0);
+}
+
+static bool RomFSRawCopy(QProgressDialog& dialog, const FileSys::VirtualDir& src,
+                         const FileSys::VirtualDir& dest, std::size_t block_size, bool full) {
+    if (src == nullptr || dest == nullptr || !src->IsReadable() || !dest->IsWritable())
+        return false;
+    if (dialog.wasCanceled())
+        return false;
+
+    if (full) {
+        for (const auto& file : src->GetFiles()) {
+            const auto out = VfsDirectoryCreateFileWrapper(dest, file->GetName());
+            if (!FileSys::VfsRawCopy(file, out, block_size))
+                return false;
+            dialog.setValue(dialog.value() + 1);
+            if (dialog.wasCanceled())
+                return false;
+        }
+    }
+
+    for (const auto& dir : src->GetSubdirectories()) {
+        const auto out = dest->CreateSubdirectory(dir->GetName());
+        if (!RomFSRawCopy(dialog, dir, out, block_size, full))
+            return false;
+        dialog.setValue(dialog.value() + 1);
+        if (dialog.wasCanceled())
+            return false;
+    }
+
+    return true;
+}
+
+void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path) {
+    const auto path = fmt::format("{}{:016X}/romfs",
+                                  FileUtil::GetUserPath(FileUtil::UserPath::DumpDir), program_id);
+
+    const auto failed = [this, &path] {
+        QMessageBox::warning(this, tr("RomFS Extraction Failed!"),
+                             tr("There was an error copying the RomFS files or the user "
+                                "cancelled the operation."));
+        vfs->DeleteDirectory(path);
+    };
+
+    const auto loader = Loader::GetLoader(vfs->OpenFile(game_path, FileSys::Mode::Read));
+    if (loader == nullptr) {
+        failed();
+        return;
+    }
+
+    FileSys::VirtualFile file;
+    if (loader->ReadRomFS(file) != Loader::ResultStatus::Success) {
+        failed();
+        return;
+    }
+
+    const auto romfs =
+        loader->IsRomFSUpdatable()
+            ? FileSys::PatchManager(program_id).PatchRomFS(file, loader->ReadRomFSIVFCOffset())
+            : file;
+
+    const auto extracted = FileSys::ExtractRomFS(romfs, FileSys::RomFSExtractionType::Full);
+    if (extracted == nullptr) {
+        failed();
+        return;
+    }
+
+    const auto out = VfsFilesystemCreateDirectoryWrapper(vfs, path, FileSys::Mode::ReadWrite);
+
+    if (out == nullptr) {
+        failed();
+        return;
+    }
+
+    bool ok;
+    const auto res = QInputDialog::getItem(
+        this, tr("Select RomFS Dump Mode"),
+        tr("Please select the how you would like the RomFS dumped.<br>Full will copy all of the "
+           "files into the new directory while <br>skeleton will only create the directory "
+           "structure."),
+        {"Full", "Skeleton"}, 0, false, &ok);
+    if (!ok)
+        failed();
+
+    const auto full = res == "Full";
+    const auto entry_size = CalculateRomFSEntrySize(extracted, full);
+
+    QProgressDialog progress(tr("Extracting RomFS..."), tr("Cancel"), 0, entry_size, this);
+    progress.setWindowModality(Qt::WindowModal);
+    progress.setMinimumDuration(100);
+
+    if (RomFSRawCopy(progress, extracted, out, 0x400000, full)) {
+        progress.close();
+        QMessageBox::information(this, tr("RomFS Extraction Succeeded!"),
+                                 tr("The operation completed successfully."));
+        QDesktopServices::openUrl(QUrl::fromLocalFile(QString::fromStdString(path)));
+    } else {
+        progress.close();
+        failed();
+    }
+}
+
+void GMainWindow::OnGameListCopyTID(u64 program_id) {
+    QClipboard* clipboard = QGuiApplication::clipboard();
+    clipboard->setText(QString::fromStdString(fmt::format("{:016X}", program_id)));
+}
+
 void GMainWindow::OnGameListNavigateToGamedbEntry(u64 program_id,
                                                   const CompatibilityList& compatibility_list) {
     const auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id);
@@ -790,7 +930,8 @@ void GMainWindow::OnMenuInstallToNAND() {
         return;
     }
 
-    const auto qt_raw_copy = [this](FileSys::VirtualFile src, FileSys::VirtualFile dest) {
+    const auto qt_raw_copy = [this](const FileSys::VirtualFile& src,
+                                    const FileSys::VirtualFile& dest, std::size_t block_size) {
         if (src == nullptr || dest == nullptr)
             return false;
         if (!dest->Resize(src->GetSize()))
@@ -804,7 +945,7 @@ void GMainWindow::OnMenuInstallToNAND() {
             tr("Cancel"), 0, progress_maximum, this);
         progress.setWindowModality(Qt::WindowModal);
 
-        for (size_t i = 0; i < src->GetSize(); i += buffer.size()) {
+        for (std::size_t i = 0; i < src->GetSize(); i += buffer.size()) {
             if (progress.wasCanceled()) {
                 dest->Resize(0);
                 return false;
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 552e3e61c..8ee9242b1 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -138,6 +138,8 @@ private slots:
     /// Called whenever a user selects a game in the game list widget.
     void OnGameListLoadFile(QString game_path);
     void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target);
+    void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
+    void OnGameListCopyTID(u64 program_id);
     void OnGameListNavigateToGamedbEntry(u64 program_id,
                                          const CompatibilityList& compatibility_list);
     void OnMenuLoadFile();
diff --git a/src/yuzu/util/util.cpp b/src/yuzu/util/util.cpp
index e99042a23..62c080aff 100644
--- a/src/yuzu/util/util.cpp
+++ b/src/yuzu/util/util.cpp
@@ -30,8 +30,9 @@ QPixmap CreateCirclePixmapFromColor(const QColor& color) {
     QPixmap circle_pixmap(16, 16);
     circle_pixmap.fill(Qt::transparent);
     QPainter painter(&circle_pixmap);
+    painter.setRenderHint(QPainter::Antialiasing);
     painter.setPen(color);
     painter.setBrush(color);
-    painter.drawEllipse(0, 0, 15, 15);
+    painter.drawEllipse({circle_pixmap.width() / 2.0, circle_pixmap.height() / 2.0}, 7.0, 7.0);
     return circle_pixmap;
 }
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 991abda2e..a478b0a56 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -120,11 +120,15 @@ void Config::ReadValues() {
                           sdl2_config->Get("Data Storage", "nand_directory",
                                            FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)));
     FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir,
-                          sdl2_config->Get("Data Storage", "nand_directory",
+                          sdl2_config->Get("Data Storage", "sdmc_directory",
                                            FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)));
 
     // System
     Settings::values.use_docked_mode = sdl2_config->GetBoolean("System", "use_docked_mode", false);
+    Settings::values.username = sdl2_config->Get("System", "username", "yuzu");
+    if (Settings::values.username.empty()) {
+        Settings::values.username = "yuzu";
+    }
 
     // Miscellaneous
     Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Trace");
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 002a4ec15..d35c441e9 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -176,7 +176,7 @@ use_docked_mode =
 
 # Sets the account username, max length is 32 characters
 # yuzu (default)
-username =
+username = yuzu
 
 # Sets the systems language index
 # 0: Japanese, 1: English (default), 2: French, 3: German, 4: Italian, 5: Spanish, 6: Chinese,
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index b1c364fbb..b2559b717 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -20,8 +20,10 @@
 #include "common/string_util.h"
 #include "common/telemetry.h"
 #include "core/core.h"
+#include "core/crypto/key_manager.h"
 #include "core/file_sys/vfs_real.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/service/filesystem/filesystem.h"
 #include "core/loader/loader.h"
 #include "core/settings.h"
 #include "core/telemetry_session.h"
@@ -29,7 +31,6 @@
 #include "yuzu_cmd/emu_window/emu_window_sdl2.h"
 
 #include <getopt.h>
-#include "core/crypto/key_manager.h"
 #ifndef _MSC_VER
 #include <unistd.h>
 #endif
@@ -169,6 +170,7 @@ int main(int argc, char** argv) {
 
     Core::System& system{Core::System::GetInstance()};
     system.SetFilesystem(std::make_shared<FileSys::RealVfsFilesystem>());
+    Service::FileSystem::CreateFactories(system.GetFilesystem());
 
     SCOPE_EXIT({ system.Shutdown(); });