201 files changed, 2181 insertions, 1090 deletions
diff --git a/.gitmodules b/.gitmodules
index 4f4e8690b..e73ca99e3 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -31,3 +31,6 @@
 [submodule "opus"]
     path = externals/opus
     url = https://github.com/ogniK5377/opus.git
+[submodule "soundtouch"]
+	path = externals/soundtouch
+	url = https://github.com/citra-emu/ext-soundtouch.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0f32ecfba..500d099fc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -431,6 +431,9 @@ enable_testing()
 add_subdirectory(externals)
 add_subdirectory(src)
 
+# Set yuzu project as default StartUp Project in Visual Studio
+set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT yuzu)
+
 
 # Installation instructions
 # =========================
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 3d8e10c2b..53dcf1f1a 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -50,6 +50,9 @@ add_subdirectory(open_source_archives EXCLUDE_FROM_ALL)
 add_library(unicorn-headers INTERFACE)
 target_include_directories(unicorn-headers INTERFACE ./unicorn/include)
 
+# SoundTouch
+add_subdirectory(soundtouch)
+
 # Xbyak
 if (ARCHITECTURE_x86_64)
     # Defined before "dynarmic" above
diff --git a/externals/soundtouch b/externals/soundtouch
new file mode 160000
+Subproject 060181eaf273180d3a7e87349895bd0cb6ccbf4
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 82e4850f7..c381dbe1d 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -17,6 +17,8 @@ add_library(audio_core STATIC
     sink_stream.h
     stream.cpp
     stream.h
+    time_stretch.cpp
+    time_stretch.h
 
     $<$<BOOL:${ENABLE_CUBEB}>:cubeb_sink.cpp cubeb_sink.h>
 )
@@ -24,6 +26,7 @@ add_library(audio_core STATIC
 create_target_directory_groups(audio_core)
 
 target_link_libraries(audio_core PUBLIC common core)
+target_link_libraries(audio_core PRIVATE SoundTouch)
 
 if(ENABLE_CUBEB)
     target_link_libraries(audio_core PRIVATE cubeb)
diff --git a/src/audio_core/algorithm/filter.cpp b/src/audio_core/algorithm/filter.cpp
index 9fcd0614d..f65bf64f7 100644
--- a/src/audio_core/algorithm/filter.cpp
+++ b/src/audio_core/algorithm/filter.cpp
@@ -35,12 +35,12 @@ Filter::Filter(double a0, double a1, double a2, double b0, double b1, double b2)
     : a1(a1 / a0), a2(a2 / a0), b0(b0 / a0), b1(b1 / a0), b2(b2 / a0) {}
 
 void Filter::Process(std::vector<s16>& signal) {
-    const size_t num_frames = signal.size() / 2;
-    for (size_t i = 0; i < num_frames; i++) {
+    const std::size_t num_frames = signal.size() / 2;
+    for (std::size_t i = 0; i < num_frames; i++) {
         std::rotate(in.begin(), in.end() - 1, in.end());
         std::rotate(out.begin(), out.end() - 1, out.end());
 
-        for (size_t ch = 0; ch < channel_count; ch++) {
+        for (std::size_t ch = 0; ch < channel_count; ch++) {
             in[0][ch] = signal[i * channel_count + ch];
 
             out[0][ch] = b0 * in[0][ch] + b1 * in[1][ch] + b2 * in[2][ch] - a1 * out[1][ch] -
@@ -54,14 +54,14 @@ void Filter::Process(std::vector<s16>& signal) {
 /// Calculates the appropriate Q for each biquad in a cascading filter.
 /// @param total_count The total number of biquads to be cascaded.
 /// @param index 0-index of the biquad to calculate the Q value for.
-static double CascadingBiquadQ(size_t total_count, size_t index) {
+static double CascadingBiquadQ(std::size_t total_count, std::size_t index) {
     const double pole = M_PI * (2 * index + 1) / (4.0 * total_count);
     return 1.0 / (2.0 * std::cos(pole));
 }
 
-CascadingFilter CascadingFilter::LowPass(double cutoff, size_t cascade_size) {
+CascadingFilter CascadingFilter::LowPass(double cutoff, std::size_t cascade_size) {
     std::vector<Filter> cascade(cascade_size);
-    for (size_t i = 0; i < cascade_size; i++) {
+    for (std::size_t i = 0; i < cascade_size; i++) {
         cascade[i] = Filter::LowPass(cutoff, CascadingBiquadQ(cascade_size, i));
     }
     return CascadingFilter{std::move(cascade)};
diff --git a/src/audio_core/algorithm/filter.h b/src/audio_core/algorithm/filter.h
index a41beef98..3546d149b 100644
--- a/src/audio_core/algorithm/filter.h
+++ b/src/audio_core/algorithm/filter.h
@@ -30,7 +30,7 @@ public:
     void Process(std::vector<s16>& signal);
 
 private:
-    static constexpr size_t channel_count = 2;
+    static constexpr std::size_t channel_count = 2;
 
     /// Coefficients are in normalized form (a0 = 1.0).
     double a1, a2, b0, b1, b2;
@@ -46,7 +46,7 @@ public:
     /// Creates a cascading low-pass filter.
     /// @param cutoff Determines the cutoff frequency. A value from 0.0 to 1.0.
     /// @param cascade_size Number of biquads in cascade.
-    static CascadingFilter LowPass(double cutoff, size_t cascade_size);
+    static CascadingFilter LowPass(double cutoff, std::size_t cascade_size);
 
     /// Passthrough.
     CascadingFilter();
diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp
index 11459821f..3aea9b0f2 100644
--- a/src/audio_core/algorithm/interpolate.cpp
+++ b/src/audio_core/algorithm/interpolate.cpp
@@ -14,7 +14,7 @@
 namespace AudioCore {
 
 /// The Lanczos kernel
-static double Lanczos(size_t a, double x) {
+static double Lanczos(std::size_t a, double x) {
     if (x == 0.0)
         return 1.0;
     const double px = M_PI * x;
@@ -37,15 +37,15 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input,
     }
     state.nyquist.Process(input);
 
-    constexpr size_t taps = InterpolationState::lanczos_taps;
-    const size_t num_frames = input.size() / 2;
+    constexpr std::size_t taps = InterpolationState::lanczos_taps;
+    const std::size_t num_frames = input.size() / 2;
 
     std::vector<s16> output;
-    output.reserve(static_cast<size_t>(input.size() / ratio + 4));
+    output.reserve(static_cast<std::size_t>(input.size() / ratio + 4));
 
     double& pos = state.position;
     auto& h = state.history;
-    for (size_t i = 0; i < num_frames; ++i) {
+    for (std::size_t i = 0; i < num_frames; ++i) {
         std::rotate(h.begin(), h.end() - 1, h.end());
         h[0][0] = input[i * 2 + 0];
         h[0][1] = input[i * 2 + 1];
@@ -53,7 +53,7 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input,
         while (pos <= 1.0) {
             double l = 0.0;
             double r = 0.0;
-            for (size_t j = 0; j < h.size(); j++) {
+            for (std::size_t j = 0; j < h.size(); j++) {
                 l += Lanczos(taps, pos + j - taps + 1) * h[j][0];
                 r += Lanczos(taps, pos + j - taps + 1) * h[j][1];
             }
diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h
index c79c2eef4..edbd6460f 100644
--- a/src/audio_core/algorithm/interpolate.h
+++ b/src/audio_core/algorithm/interpolate.h
@@ -12,8 +12,8 @@
 namespace AudioCore {
 
 struct InterpolationState {
-    static constexpr size_t lanczos_taps = 4;
-    static constexpr size_t history_size = lanczos_taps * 2 - 1;
+    static constexpr std::size_t lanczos_taps = 4;
+    static constexpr std::size_t history_size = lanczos_taps * 2 - 1;
 
     double current_ratio = 0.0;
     CascadingFilter nyquist;
diff --git a/src/audio_core/audio_out.cpp b/src/audio_core/audio_out.cpp
index 12632a95c..0c8f5b18e 100644
--- a/src/audio_core/audio_out.cpp
+++ b/src/audio_core/audio_out.cpp
@@ -39,7 +39,8 @@ StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&&
         sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name));
 }
 
-std::vector<Buffer::Tag> AudioOut::GetTagsAndReleaseBuffers(StreamPtr stream, size_t max_count) {
+std::vector<Buffer::Tag> AudioOut::GetTagsAndReleaseBuffers(StreamPtr stream,
+                                                            std::size_t max_count) {
     return stream->GetTagsAndReleaseBuffers(max_count);
 }
 
diff --git a/src/audio_core/audio_out.h b/src/audio_core/audio_out.h
index 39b7e656b..df9607ac7 100644
--- a/src/audio_core/audio_out.h
+++ b/src/audio_core/audio_out.h
@@ -25,7 +25,7 @@ public:
                          Stream::ReleaseCallback&& release_callback);
 
     /// Returns a vector of recently released buffers specified by tag for the specified stream
-    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, size_t max_count);
+    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count);
 
     /// Starts an audio stream for playback
     void StartStream(StreamPtr stream);
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index a75cd3be5..ed3b7defc 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -52,8 +52,8 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
                 memory_pool_count * sizeof(MemoryPoolInfo));
 
     // Copy VoiceInfo structs
-    size_t offset{sizeof(UpdateDataHeader) + config.behavior_size + config.memory_pools_size +
-                  config.voice_resource_size};
+    std::size_t offset{sizeof(UpdateDataHeader) + config.behavior_size + config.memory_pools_size +
+                       config.voice_resource_size};
     for (auto& voice : voices) {
         std::memcpy(&voice.Info(), input_params.data() + offset, sizeof(VoiceInfo));
         offset += sizeof(VoiceInfo);
@@ -72,7 +72,7 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
 
     // Update memory pool state
     std::vector<MemoryPoolEntry> memory_pool(memory_pool_count);
-    for (size_t index = 0; index < memory_pool.size(); ++index) {
+    for (std::size_t index = 0; index < memory_pool.size(); ++index) {
         if (mem_pool_info[index].pool_state == MemoryPoolStates::RequestAttach) {
             memory_pool[index].state = MemoryPoolStates::Attached;
         } else if (mem_pool_info[index].pool_state == MemoryPoolStates::RequestDetach) {
@@ -93,7 +93,7 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
                 response_data.memory_pools_size);
 
     // Copy output voice status
-    size_t voice_out_status_offset{sizeof(UpdateDataHeader) + response_data.memory_pools_size};
+    std::size_t voice_out_status_offset{sizeof(UpdateDataHeader) + response_data.memory_pools_size};
     for (const auto& voice : voices) {
         std::memcpy(output_params.data() + voice_out_status_offset, &voice.GetOutStatus(),
                     sizeof(VoiceOutStatus));
@@ -103,12 +103,12 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
     return output_params;
 }
 
-void AudioRenderer::VoiceState::SetWaveIndex(size_t index) {
+void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) {
     wave_index = index & 3;
     is_refresh_pending = true;
 }
 
-std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(size_t sample_count) {
+std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_count) {
     if (!IsPlaying()) {
         return {};
     }
@@ -117,9 +117,9 @@ std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(size_t sample_count)
         RefreshBuffer();
     }
 
-    const size_t max_size{samples.size() - offset};
-    const size_t dequeue_offset{offset};
-    size_t size{sample_count * STREAM_NUM_CHANNELS};
+    const std::size_t max_size{samples.size() - offset};
+    const std::size_t dequeue_offset{offset};
+    std::size_t size{sample_count * STREAM_NUM_CHANNELS};
     if (size > max_size) {
         size = max_size;
     }
@@ -184,7 +184,7 @@ void AudioRenderer::VoiceState::RefreshBuffer() {
     case 1:
         // 1 channel is upsampled to 2 channel
         samples.resize(new_samples.size() * 2);
-        for (size_t index = 0; index < new_samples.size(); ++index) {
+        for (std::size_t index = 0; index < new_samples.size(); ++index) {
             samples[index * 2] = new_samples[index];
             samples[index * 2 + 1] = new_samples[index];
         }
@@ -210,7 +210,7 @@ static constexpr s16 ClampToS16(s32 value) {
 }
 
 void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
-    constexpr size_t BUFFER_SIZE{512};
+    constexpr std::size_t BUFFER_SIZE{512};
     std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels());
 
     for (auto& voice : voices) {
@@ -218,7 +218,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
             continue;
         }
 
-        size_t offset{};
+        std::size_t offset{};
         s64 samples_remaining{BUFFER_SIZE};
         while (samples_remaining > 0) {
             const std::vector<s16> samples{voice.DequeueSamples(samples_remaining)};
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 6d069d693..c8d2cd188 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -184,16 +184,16 @@ private:
             return info;
         }
 
-        void SetWaveIndex(size_t index);
-        std::vector<s16> DequeueSamples(size_t sample_count);
+        void SetWaveIndex(std::size_t index);
+        std::vector<s16> DequeueSamples(std::size_t sample_count);
         void UpdateState();
         void RefreshBuffer();
 
     private:
         bool is_in_use{};
         bool is_refresh_pending{};
-        size_t wave_index{};
-        size_t offset{};
+        std::size_t wave_index{};
+        std::size_t offset{};
         Codec::ADPCMState adpcm_state{};
         InterpolationState interp_state{};
         std::vector<s16> samples;
diff --git a/src/audio_core/codec.cpp b/src/audio_core/codec.cpp
index c3021403f..454de798b 100644
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -8,27 +8,27 @@
 
 namespace AudioCore::Codec {
 
-std::vector<s16> DecodeADPCM(const u8* const data, size_t size, const ADPCM_Coeff& coeff,
+std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM_Coeff& coeff,
                              ADPCMState& state) {
     // GC-ADPCM with scale factor and variable coefficients.
     // Frames are 8 bytes long containing 14 samples each.
     // Samples are 4 bits (one nibble) long.
 
-    constexpr size_t FRAME_LEN = 8;
-    constexpr size_t SAMPLES_PER_FRAME = 14;
+    constexpr std::size_t FRAME_LEN = 8;
+    constexpr std::size_t SAMPLES_PER_FRAME = 14;
     constexpr std::array<int, 16> SIGNED_NIBBLES = {
         {0, 1, 2, 3, 4, 5, 6, 7, -8, -7, -6, -5, -4, -3, -2, -1}};
 
-    const size_t sample_count = (size / FRAME_LEN) * SAMPLES_PER_FRAME;
-    const size_t ret_size =
+    const std::size_t sample_count = (size / FRAME_LEN) * SAMPLES_PER_FRAME;
+    const std::size_t ret_size =
         sample_count % 2 == 0 ? sample_count : sample_count + 1; // Ensure multiple of two.
     std::vector<s16> ret(ret_size);
 
     int yn1 = state.yn1, yn2 = state.yn2;
 
-    const size_t NUM_FRAMES =
+    const std::size_t NUM_FRAMES =
         (sample_count + (SAMPLES_PER_FRAME - 1)) / SAMPLES_PER_FRAME; // Round up.
-    for (size_t framei = 0; framei < NUM_FRAMES; framei++) {
+    for (std::size_t framei = 0; framei < NUM_FRAMES; framei++) {
         const int frame_header = data[framei * FRAME_LEN];
         const int scale = 1 << (frame_header & 0xF);
         const int idx = (frame_header >> 4) & 0x7;
@@ -53,9 +53,9 @@ std::vector<s16> DecodeADPCM(const u8* const data, size_t size, const ADPCM_Coef
             return static_cast<s16>(val);
         };
 
-        size_t outputi = framei * SAMPLES_PER_FRAME;
-        size_t datai = framei * FRAME_LEN + 1;
-        for (size_t i = 0; i < SAMPLES_PER_FRAME && outputi < sample_count; i += 2) {
+        std::size_t outputi = framei * SAMPLES_PER_FRAME;
+        std::size_t datai = framei * FRAME_LEN + 1;
+        for (std::size_t i = 0; i < SAMPLES_PER_FRAME && outputi < sample_count; i += 2) {
             const s16 sample1 = decode_sample(SIGNED_NIBBLES[data[datai] >> 4]);
             ret[outputi] = sample1;
             outputi++;
diff --git a/src/audio_core/codec.h b/src/audio_core/codec.h
index 3f845c42c..ef2ce01a8 100644
--- a/src/audio_core/codec.h
+++ b/src/audio_core/codec.h
@@ -38,7 +38,7 @@ using ADPCM_Coeff = std::array<s16, 16>;
  * @param state ADPCM state, this is updated with new state
  * @return Decoded stereo signed PCM16 data, sample_count in length
  */
-std::vector<s16> DecodeADPCM(const u8* const data, size_t size, const ADPCM_Coeff& coeff,
+std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM_Coeff& coeff,
                              ADPCMState& state);
 
 }; // namespace AudioCore::Codec
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 5a1177d0c..392039688 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -3,27 +3,23 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <atomic>
 #include <cstring>
-#include <mutex>
-
 #include "audio_core/cubeb_sink.h"
 #include "audio_core/stream.h"
+#include "audio_core/time_stretch.h"
 #include "common/logging/log.h"
+#include "common/ring_buffer.h"
+#include "core/settings.h"
 
 namespace AudioCore {
 
-class SinkStreamImpl final : public SinkStream {
+class CubebSinkStream final : public SinkStream {
 public:
-    SinkStreamImpl(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
-                   const std::string& name)
-        : ctx{ctx}, num_channels{num_channels_} {
-
-        if (num_channels == 6) {
-            // 6-channel audio does not seem to work with cubeb + SDL, so we downsample this to 2
-            // channel for now
-            is_6_channel = true;
-            num_channels = 2;
-        }
+    CubebSinkStream(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
+                    const std::string& name)
+        : ctx{ctx}, num_channels{std::min(num_channels_, 2u)}, time_stretch{sample_rate,
+                                                                            num_channels} {
 
         cubeb_stream_params params{};
         params.rate = sample_rate;
@@ -38,7 +34,7 @@ public:
 
         if (cubeb_stream_init(ctx, &stream_backend, name.c_str(), nullptr, nullptr, output_device,
                               &params, std::max(512u, minimum_latency),
-                              &SinkStreamImpl::DataCallback, &SinkStreamImpl::StateCallback,
+                              &CubebSinkStream::DataCallback, &CubebSinkStream::StateCallback,
                               this) != CUBEB_OK) {
             LOG_CRITICAL(Audio_Sink, "Error initializing cubeb stream");
             return;
@@ -50,7 +46,7 @@ public:
         }
     }
 
-    ~SinkStreamImpl() {
+    ~CubebSinkStream() {
         if (!ctx) {
             return;
         }
@@ -62,27 +58,32 @@ public:
         cubeb_stream_destroy(stream_backend);
     }
 
-    void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) override {
-        if (!ctx) {
+    void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override {
+        if (source_num_channels > num_channels) {
+            // Downsample 6 channels to 2
+            std::vector<s16> buf;
+            buf.reserve(samples.size() * num_channels / source_num_channels);
+            for (std::size_t i = 0; i < samples.size(); i += source_num_channels) {
+                for (std::size_t ch = 0; ch < num_channels; ch++) {
+                    buf.push_back(samples[i + ch]);
+                }
+            }
+            queue.Push(buf);
             return;
         }
 
-        std::lock_guard lock{queue_mutex};
+        queue.Push(samples);
+    }
 
-        queue.reserve(queue.size() + samples.size() * GetNumChannels());
+    std::size_t SamplesInQueue(u32 num_channels) const override {
+        if (!ctx)
+            return 0;
 
-        if (is_6_channel) {
-            // Downsample 6 channels to 2
-            const size_t sample_count_copy_size = samples.size() * 2;
-            queue.reserve(sample_count_copy_size);
-            for (size_t i = 0; i < samples.size(); i += num_channels) {
-                queue.push_back(samples[i]);
-                queue.push_back(samples[i + 1]);
-            }
-        } else {
-            // Copy as-is
-            std::copy(samples.begin(), samples.end(), std::back_inserter(queue));
-        }
+        return queue.Size() / num_channels;
+    }
+
+    void Flush() override {
+        should_flush = true;
     }
 
     u32 GetNumChannels() const {
@@ -95,10 +96,11 @@ private:
     cubeb* ctx{};
     cubeb_stream* stream_backend{};
     u32 num_channels{};
-    bool is_6_channel{};
 
-    std::mutex queue_mutex;
-    std::vector<s16> queue;
+    Common::RingBuffer<s16, 0x10000> queue;
+    std::array<s16, 2> last_frame;
+    std::atomic<bool> should_flush{};
+    TimeStretcher time_stretch;
 
     static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
                              void* output_buffer, long num_frames);
@@ -117,10 +119,10 @@ CubebSink::CubebSink(std::string target_device_name) {
             LOG_WARNING(Audio_Sink, "Audio output device enumeration not supported");
         } else {
             const auto collection_end{collection.device + collection.count};
-            const auto device{std::find_if(collection.device, collection_end,
-                                           [&](const cubeb_device_info& device) {
-                                               return target_device_name == device.friendly_name;
-                                           })};
+            const auto device{
+                std::find_if(collection.device, collection_end, [&](const cubeb_device_info& info) {
+                    return target_device_name == info.friendly_name;
+                })};
             if (device != collection_end) {
                 output_device = device->devid;
             }
@@ -144,44 +146,59 @@ CubebSink::~CubebSink() {
 SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
                                          const std::string& name) {
     sink_streams.push_back(
-        std::make_unique<SinkStreamImpl>(ctx, sample_rate, num_channels, output_device, name));
+        std::make_unique<CubebSinkStream>(ctx, sample_rate, num_channels, output_device, name));
     return *sink_streams.back();
 }
 
-long SinkStreamImpl::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
-                                  void* output_buffer, long num_frames) {
-    SinkStreamImpl* impl = static_cast<SinkStreamImpl*>(user_data);
+long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
+                                   void* output_buffer, long num_frames) {
+    CubebSinkStream* impl = static_cast<CubebSinkStream*>(user_data);
     u8* buffer = reinterpret_cast<u8*>(output_buffer);
 
     if (!impl) {
         return {};
     }
 
-    std::lock_guard lock{impl->queue_mutex};
-
-    const size_t frames_to_write{
-        std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))};
+    const std::size_t num_channels = impl->GetNumChannels();
+    const std::size_t samples_to_write = num_channels * num_frames;
+    std::size_t samples_written;
+
+    if (Settings::values.enable_audio_stretching) {
+        const std::vector<s16> in{impl->queue.Pop()};
+        const std::size_t num_in{in.size() / num_channels};
+        s16* const out{reinterpret_cast<s16*>(buffer)};
+        const std::size_t out_frames =
+            impl->time_stretch.Process(in.data(), num_in, out, num_frames);
+        samples_written = out_frames * num_channels;
+
+        if (impl->should_flush) {
+            impl->time_stretch.Flush();
+            impl->should_flush = false;
+        }
+    } else {
+        samples_written = impl->queue.Pop(buffer, samples_to_write);
+    }
 
-    memcpy(buffer, impl->queue.data(), frames_to_write * sizeof(s16) * impl->GetNumChannels());
-    impl->queue.erase(impl->queue.begin(),
-                      impl->queue.begin() + frames_to_write * impl->GetNumChannels());
+    if (samples_written >= num_channels) {
+        std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),
+                    num_channels * sizeof(s16));
+    }
 
-    if (frames_to_write < num_frames) {
-        // Fill the rest of the frames with silence
-        memset(buffer + frames_to_write * sizeof(s16) * impl->GetNumChannels(), 0,
-               (num_frames - frames_to_write) * sizeof(s16) * impl->GetNumChannels());
+    // Fill the rest of the frames with last_frame
+    for (std::size_t i = samples_written; i < samples_to_write; i += num_channels) {
+        std::memcpy(buffer + i * sizeof(s16), &impl->last_frame[0], num_channels * sizeof(s16));
     }
 
     return num_frames;
 }
 
-void SinkStreamImpl::StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) {}
+void CubebSinkStream::StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) {}
 
 std::vector<std::string> ListCubebSinkDevices() {
     std::vector<std::string> device_list;
     cubeb* ctx;
 
-    if (cubeb_init(&ctx, "Citra Device Enumerator", nullptr) != CUBEB_OK) {
+    if (cubeb_init(&ctx, "yuzu Device Enumerator", nullptr) != CUBEB_OK) {
         LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
         return {};
     }
@@ -190,7 +207,7 @@ std::vector<std::string> ListCubebSinkDevices() {
     if (cubeb_enumerate_devices(ctx, CUBEB_DEVICE_TYPE_OUTPUT, &collection) != CUBEB_OK) {
         LOG_WARNING(Audio_Sink, "Audio output device enumeration not supported");
     } else {
-        for (size_t i = 0; i < collection.count; i++) {
+        for (std::size_t i = 0; i < collection.count; i++) {
             const cubeb_device_info& device = collection.device[i];
             if (device.friendly_name) {
                 device_list.emplace_back(device.friendly_name);
diff --git a/src/audio_core/null_sink.h b/src/audio_core/null_sink.h
index f235d93e5..a78d78893 100644
--- a/src/audio_core/null_sink.h
+++ b/src/audio_core/null_sink.h
@@ -21,6 +21,12 @@ public:
 private:
     struct NullSinkStreamImpl final : SinkStream {
         void EnqueueSamples(u32 /*num_channels*/, const std::vector<s16>& /*samples*/) override {}
+
+        std::size_t SamplesInQueue(u32 /*num_channels*/) const override {
+            return 0;
+        }
+
+        void Flush() override {}
     } null_sink_stream;
 };
 
diff --git a/src/audio_core/sink_details.cpp b/src/audio_core/sink_details.cpp
index 955ba20fb..67cf1f3b2 100644
--- a/src/audio_core/sink_details.cpp
+++ b/src/audio_core/sink_details.cpp
@@ -24,7 +24,7 @@ const std::vector<SinkDetails> g_sink_details = {
                 [] { return std::vector<std::string>{"null"}; }},
 };
 
-const SinkDetails& GetSinkDetails(std::string sink_id) {
+const SinkDetails& GetSinkDetails(std::string_view sink_id) {
     auto iter =
         std::find_if(g_sink_details.begin(), g_sink_details.end(),
                      [sink_id](const auto& sink_detail) { return sink_detail.id == sink_id; });
diff --git a/src/audio_core/sink_details.h b/src/audio_core/sink_details.h
index ea666c554..03534b187 100644
--- a/src/audio_core/sink_details.h
+++ b/src/audio_core/sink_details.h
@@ -6,6 +6,8 @@
 
 #include <functional>
 #include <memory>
+#include <string>
+#include <string_view>
 #include <utility>
 #include <vector>
 
@@ -30,6 +32,6 @@ struct SinkDetails {
 
 extern const std::vector<SinkDetails> g_sink_details;
 
-const SinkDetails& GetSinkDetails(std::string sink_id);
+const SinkDetails& GetSinkDetails(std::string_view sink_id);
 
 } // namespace AudioCore
diff --git a/src/audio_core/sink_stream.h b/src/audio_core/sink_stream.h
index 41b6736d8..4309ad094 100644
--- a/src/audio_core/sink_stream.h
+++ b/src/audio_core/sink_stream.h
@@ -25,6 +25,10 @@ public:
      * @param samples Samples in interleaved stereo PCM16 format.
      */
     virtual void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) = 0;
+
+    virtual std::size_t SamplesInQueue(u32 num_channels) const = 0;
+
+    virtual void Flush() = 0;
 };
 
 using SinkStreamPtr = std::unique_ptr<SinkStream>;
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index dbae75d8c..386f2ec66 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -17,7 +17,7 @@
 
 namespace AudioCore {
 
-constexpr size_t MaxAudioBufferCount{32};
+constexpr std::size_t MaxAudioBufferCount{32};
 
 u32 Stream::GetNumChannels() const {
     switch (format) {
@@ -52,7 +52,7 @@ void Stream::Stop() {
 }
 
 s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
-    const size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
+    const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
     return CoreTiming::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
 }
 
@@ -73,6 +73,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) {
 void Stream::PlayNextBuffer() {
     if (!IsPlaying()) {
         // Ensure we are in playing state before playing the next buffer
+        sink_stream.Flush();
         return;
     }
 
@@ -83,6 +84,7 @@ void Stream::PlayNextBuffer() {
 
     if (queued_buffers.empty()) {
         // No queued buffers - we are effectively paused
+        sink_stream.Flush();
         return;
     }
 
@@ -90,6 +92,7 @@ void Stream::PlayNextBuffer() {
     queued_buffers.pop();
 
     VolumeAdjustSamples(active_buffer->Samples());
+
     sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
 
     CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
@@ -119,9 +122,9 @@ bool Stream::ContainsBuffer(Buffer::Tag tag) const {
     return {};
 }
 
-std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(size_t max_count) {
+std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(std::size_t max_count) {
     std::vector<Buffer::Tag> tags;
-    for (size_t count = 0; count < max_count && !released_buffers.empty(); ++count) {
+    for (std::size_t count = 0; count < max_count && !released_buffers.empty(); ++count) {
         tags.push_back(released_buffers.front()->GetTag());
         released_buffers.pop();
     }
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index 049b92ca9..3a435982d 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -49,7 +49,7 @@ public:
     bool ContainsBuffer(Buffer::Tag tag) const;
 
     /// Returns a vector of recently released buffers specified by tag
-    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(size_t max_count);
+    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(std::size_t max_count);
 
     /// Returns true if the stream is currently playing
     bool IsPlaying() const {
@@ -57,7 +57,7 @@ public:
     }
 
     /// Returns the number of queued buffers
-    size_t GetQueueSize() const {
+    std::size_t GetQueueSize() const {
         return queued_buffers.size();
     }
 
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp
new file mode 100644
index 000000000..fc14151da
--- /dev/null
+++ b/src/audio_core/time_stretch.cpp
@@ -0,0 +1,69 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include "audio_core/time_stretch.h"
+#include "common/logging/log.h"
+
+namespace AudioCore {
+
+TimeStretcher::TimeStretcher(u32 sample_rate, u32 channel_count)
+    : m_sample_rate(sample_rate), m_channel_count(channel_count) {
+    m_sound_touch.setChannels(channel_count);
+    m_sound_touch.setSampleRate(sample_rate);
+    m_sound_touch.setPitch(1.0);
+    m_sound_touch.setTempo(1.0);
+}
+
+void TimeStretcher::Clear() {
+    m_sound_touch.clear();
+}
+
+void TimeStretcher::Flush() {
+    m_sound_touch.flush();
+}
+
+std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out,
+                                   std::size_t num_out) {
+    const double time_delta = static_cast<double>(num_out) / m_sample_rate; // seconds
+
+    // We were given actual_samples number of samples, and num_samples were requested from us.
+    double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out);
+
+    const double max_latency = 1.0; // seconds
+    const double max_backlog = m_sample_rate * max_latency;
+    const double backlog_fullness = m_sound_touch.numSamples() / max_backlog;
+    if (backlog_fullness > 5.0) {
+        // Too many samples in backlog: Don't push anymore on
+        num_in = 0;
+    }
+
+    // We ideally want the backlog to be about 50% full.
+    // This gives some headroom both ways to prevent underflow and overflow.
+    // We tweak current_ratio to encourage this.
+    constexpr double tweak_time_scale = 0.05; // seconds
+    const double tweak_correction = (backlog_fullness - 0.5) * (time_delta / tweak_time_scale);
+    current_ratio *= std::pow(1.0 + 2.0 * tweak_correction, tweak_correction < 0 ? 3.0 : 1.0);
+
+    // This low-pass filter smoothes out variance in the calculated stretch ratio.
+    // The time-scale determines how responsive this filter is.
+    constexpr double lpf_time_scale = 2.0; // seconds
+    const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
+    m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);
+
+    // Place a lower limit of 5% speed.  When a game boots up, there will be
+    // many silence samples.  These do not need to be timestretched.
+    m_stretch_ratio = std::max(m_stretch_ratio, 0.05);
+    m_sound_touch.setTempo(m_stretch_ratio);
+
+    LOG_DEBUG(Audio, "{:5}/{:5} ratio:{:0.6f} backlog:{:0.6f}", num_in, num_out, m_stretch_ratio,
+              backlog_fullness);
+
+    m_sound_touch.putSamples(in, static_cast<u32>(num_in));
+    return m_sound_touch.receiveSamples(out, static_cast<u32>(num_out));
+}
+
+} // namespace AudioCore
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
new file mode 100644
index 000000000..c2286fba1
--- /dev/null
+++ b/src/audio_core/time_stretch.h
@@ -0,0 +1,36 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <SoundTouch.h>
+#include "common/common_types.h"
+
+namespace AudioCore {
+
+class TimeStretcher {
+public:
+    TimeStretcher(u32 sample_rate, u32 channel_count);
+
+    /// @param in       Input sample buffer
+    /// @param num_in   Number of input frames in `in`
+    /// @param out      Output sample buffer
+    /// @param num_out  Desired number of output frames in `out`
+    /// @returns Actual number of frames written to `out`
+    std::size_t Process(const s16* in, std::size_t num_in, s16* out, std::size_t num_out);
+
+    void Clear();
+
+    void Flush();
+
+private:
+    u32 m_sample_rate;
+    u32 m_channel_count;
+    soundtouch::SoundTouch m_sound_touch;
+    double m_stretch_ratio = 1.0;
+};
+
+} // namespace AudioCore
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index f41946cc6..6a3f1fe08 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -71,6 +71,7 @@ add_library(common STATIC
     param_package.cpp
     param_package.h
     quaternion.h
+    ring_buffer.h
     scm_rev.cpp
     scm_rev.h
     scope_exit.h
diff --git a/src/common/alignment.h b/src/common/alignment.h
index b9dd38746..225770fab 100644
--- a/src/common/alignment.h
+++ b/src/common/alignment.h
@@ -8,13 +8,13 @@
 namespace Common {
 
 template <typename T>
-constexpr T AlignUp(T value, size_t size) {
+constexpr T AlignUp(T value, std::size_t size) {
     static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
     return static_cast<T>(value + (size - value % size) % size);
 }
 
 template <typename T>
-constexpr T AlignDown(T value, size_t size) {
+constexpr T AlignDown(T value, std::size_t size) {
     static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
     return static_cast<T>(value - value % size);
 }
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 732201de7..bf803da8d 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -129,8 +129,8 @@ private:
 
 public:
     /// Constants to allow limited introspection of fields if needed
-    static constexpr size_t position = Position;
-    static constexpr size_t bits = Bits;
+    static constexpr std::size_t position = Position;
+    static constexpr std::size_t bits = Bits;
     static constexpr StorageType mask = (((StorageTypeU)~0) >> (8 * sizeof(T) - bits)) << position;
 
     /**
diff --git a/src/common/bit_set.h b/src/common/bit_set.h
index 5a197d8c1..5cd1352b2 100644
--- a/src/common/bit_set.h
+++ b/src/common/bit_set.h
@@ -170,14 +170,14 @@ public:
             m_val |= (IntTy)1 << bit;
     }
 
-    static BitSet AllTrue(size_t count) {
+    static BitSet AllTrue(std::size_t count) {
         return BitSet(count == sizeof(IntTy) * 8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
     }
 
-    Ref operator[](size_t bit) {
+    Ref operator[](std::size_t bit) {
         return Ref(this, (IntTy)1 << bit);
     }
-    const Ref operator[](size_t bit) const {
+    const Ref operator[](std::size_t bit) const {
         return (*const_cast<BitSet*>(this))[bit];
     }
     bool operator==(BitSet other) const {
diff --git a/src/common/cityhash.cpp b/src/common/cityhash.cpp
index de31ffbd8..4e1d874b5 100644
--- a/src/common/cityhash.cpp
+++ b/src/common/cityhash.cpp
@@ -114,7 +114,7 @@ static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
     return b;
 }
 
-static uint64 HashLen0to16(const char* s, size_t len) {
+static uint64 HashLen0to16(const char* s, std::size_t len) {
     if (len >= 8) {
         uint64 mul = k2 + len * 2;
         uint64 a = Fetch64(s) + k2;
@@ -141,7 +141,7 @@ static uint64 HashLen0to16(const char* s, size_t len) {
 
 // This probably works well for 16-byte strings as well, but it may be overkill
 // in that case.
-static uint64 HashLen17to32(const char* s, size_t len) {
+static uint64 HashLen17to32(const char* s, std::size_t len) {
     uint64 mul = k2 + len * 2;
     uint64 a = Fetch64(s) * k1;
     uint64 b = Fetch64(s + 8);
@@ -170,7 +170,7 @@ static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint
 }
 
 // Return an 8-byte hash for 33 to 64 bytes.
-static uint64 HashLen33to64(const char* s, size_t len) {
+static uint64 HashLen33to64(const char* s, std::size_t len) {
     uint64 mul = k2 + len * 2;
     uint64 a = Fetch64(s) * k2;
     uint64 b = Fetch64(s + 8);
@@ -191,7 +191,7 @@ static uint64 HashLen33to64(const char* s, size_t len) {
     return b + x;
 }
 
-uint64 CityHash64(const char* s, size_t len) {
+uint64 CityHash64(const char* s, std::size_t len) {
     if (len <= 32) {
         if (len <= 16) {
             return HashLen0to16(s, len);
@@ -212,7 +212,7 @@ uint64 CityHash64(const char* s, size_t len) {
     x = x * k1 + Fetch64(s);
 
     // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
-    len = (len - 1) & ~static_cast<size_t>(63);
+    len = (len - 1) & ~static_cast<std::size_t>(63);
     do {
         x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
         y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
@@ -229,17 +229,17 @@ uint64 CityHash64(const char* s, size_t len) {
                      HashLen16(v.second, w.second) + x);
 }
 
-uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
+uint64 CityHash64WithSeed(const char* s, std::size_t len, uint64 seed) {
     return CityHash64WithSeeds(s, len, k2, seed);
 }
 
-uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) {
+uint64 CityHash64WithSeeds(const char* s, std::size_t len, uint64 seed0, uint64 seed1) {
     return HashLen16(CityHash64(s, len) - seed0, seed1);
 }
 
 // A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
 // of any length representable in signed long.  Based on City and Murmur.
-static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
+static uint128 CityMurmur(const char* s, std::size_t len, uint128 seed) {
     uint64 a = Uint128Low64(seed);
     uint64 b = Uint128High64(seed);
     uint64 c = 0;
@@ -269,7 +269,7 @@ static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
     return uint128(a ^ b, HashLen16(b, a));
 }
 
-uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
+uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) {
     if (len < 128) {
         return CityMurmur(s, len, seed);
     }
@@ -313,7 +313,7 @@ uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
     w.first *= 9;
     v.first *= k0;
     // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
-    for (size_t tail_done = 0; tail_done < len;) {
+    for (std::size_t tail_done = 0; tail_done < len;) {
         tail_done += 32;
         y = Rotate(x + y, 42) * k0 + v.second;
         w.first += Fetch64(s + len - tail_done + 16);
@@ -331,7 +331,7 @@ uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
     return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
 }
 
-uint128 CityHash128(const char* s, size_t len) {
+uint128 CityHash128(const char* s, std::size_t len) {
     return len >= 16
                ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0))
                : CityHash128WithSeed(s, len, uint128(k0, k1));
diff --git a/src/common/cityhash.h b/src/common/cityhash.h
index bcebdb150..4b94f8e18 100644
--- a/src/common/cityhash.h
+++ b/src/common/cityhash.h
@@ -63,7 +63,7 @@
 
 #include <utility>
 #include <stdint.h>
-#include <stdlib.h> // for size_t.
+#include <stdlib.h> // for std::size_t.
 
 namespace Common {
 
@@ -77,22 +77,22 @@ inline uint64_t Uint128High64(const uint128& x) {
 }
 
 // Hash function for a byte array.
-uint64_t CityHash64(const char* buf, size_t len);
+uint64_t CityHash64(const char* buf, std::size_t len);
 
 // Hash function for a byte array.  For convenience, a 64-bit seed is also
 // hashed into the result.
-uint64_t CityHash64WithSeed(const char* buf, size_t len, uint64_t seed);
+uint64_t CityHash64WithSeed(const char* buf, std::size_t len, uint64_t seed);
 
 // Hash function for a byte array.  For convenience, two seeds are also
 // hashed into the result.
-uint64_t CityHash64WithSeeds(const char* buf, size_t len, uint64_t seed0, uint64_t seed1);
+uint64_t CityHash64WithSeeds(const char* buf, std::size_t len, uint64_t seed0, uint64_t seed1);
 
 // Hash function for a byte array.
-uint128 CityHash128(const char* s, size_t len);
+uint128 CityHash128(const char* s, std::size_t len);
 
 // Hash function for a byte array.  For convenience, a 128-bit seed is also
 // hashed into the result.
-uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed);
+uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed);
 
 // Hash 128 input bits down to 64 bits of output.
 // This is intended to be a reasonably good hash function.
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index baa721481..21a0b9738 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -76,7 +76,7 @@ namespace FileUtil {
 // Modifies argument.
 static void StripTailDirSlashes(std::string& fname) {
     if (fname.length() > 1) {
-        size_t i = fname.length();
+        std::size_t i = fname.length();
         while (i > 0 && fname[i - 1] == DIR_SEP_CHR)
             --i;
         fname.resize(i);
@@ -201,7 +201,7 @@ bool CreateFullPath(const std::string& fullPath) {
         return true;
     }
 
-    size_t position = 0;
+    std::size_t position = 0;
     while (true) {
         // Find next sub path
         position = fullPath.find(DIR_SEP_CHR, position);
@@ -299,7 +299,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
     std::array<char, 1024> buffer;
     while (!feof(input.get())) {
         // read input
-        size_t rnum = fread(buffer.data(), sizeof(char), buffer.size(), input.get());
+        std::size_t rnum = fread(buffer.data(), sizeof(char), buffer.size(), input.get());
         if (rnum != buffer.size()) {
             if (ferror(input.get()) != 0) {
                 LOG_ERROR(Common_Filesystem, "failed reading from source, {} --> {}: {}",
@@ -309,7 +309,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
         }
 
         // write output
-        size_t wnum = fwrite(buffer.data(), sizeof(char), rnum, output.get());
+        std::size_t wnum = fwrite(buffer.data(), sizeof(char), rnum, output.get());
         if (wnum != rnum) {
             LOG_ERROR(Common_Filesystem, "failed writing to output, {} --> {}: {}", srcFilename,
                       destFilename, GetLastErrorMsg());
@@ -756,11 +756,11 @@ std::string GetNANDRegistrationDir(bool system) {
     return GetUserPath(UserPath::NANDDir) + "user/Contents/registered/";
 }
 
-size_t WriteStringToFile(bool text_file, const std::string& str, const char* filename) {
+std::size_t WriteStringToFile(bool text_file, const std::string& str, const char* filename) {
     return FileUtil::IOFile(filename, text_file ? "w" : "wb").WriteBytes(str.data(), str.size());
 }
 
-size_t ReadFileToString(bool text_file, const char* filename, std::string& str) {
+std::size_t ReadFileToString(bool text_file, const char* filename, std::string& str) {
     IOFile file(filename, text_file ? "r" : "rb");
 
     if (!file.IsOpen())
@@ -829,7 +829,7 @@ std::vector<std::string> SplitPathComponents(std::string_view filename) {
 std::string_view GetParentPath(std::string_view path) {
     const auto name_bck_index = path.rfind('\\');
     const auto name_fwd_index = path.rfind('/');
-    size_t name_index;
+    std::size_t name_index;
 
     if (name_bck_index == std::string_view::npos || name_fwd_index == std::string_view::npos) {
         name_index = std::min(name_bck_index, name_fwd_index);
@@ -868,7 +868,7 @@ std::string_view GetFilename(std::string_view path) {
 }
 
 std::string_view GetExtensionFromFilename(std::string_view name) {
-    const size_t index = name.rfind('.');
+    const std::size_t index = name.rfind('.');
 
     if (index == std::string_view::npos) {
         return {};
diff --git a/src/common/file_util.h b/src/common/file_util.h
index 2f13d0b6b..24c1e413c 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -143,8 +143,9 @@ const std::string& GetExeDirectory();
 std::string AppDataRoamingDirectory();
 #endif
 
-size_t WriteStringToFile(bool text_file, const std::string& str, const char* filename);
-size_t ReadFileToString(bool text_file, const char* filename, std::string& str);
+std::size_t WriteStringToFile(bool text_file, const std::string& str, const char* filename);
+
+std::size_t ReadFileToString(bool text_file, const char* filename, std::string& str);
 
 /**
  * Splits the filename into 8.3 format
@@ -177,10 +178,10 @@ std::string_view RemoveTrailingSlash(std::string_view path);
 
 // Creates a new vector containing indices [first, last) from the original.
 template <typename T>
-std::vector<T> SliceVector(const std::vector<T>& vector, size_t first, size_t last) {
+std::vector<T> SliceVector(const std::vector<T>& vector, std::size_t first, std::size_t last) {
     if (first >= last)
         return {};
-    last = std::min<size_t>(last, vector.size());
+    last = std::min<std::size_t>(last, vector.size());
     return std::vector<T>(vector.begin() + first, vector.begin() + first + last);
 }
 
@@ -213,47 +214,47 @@ public:
     bool Close();
 
     template <typename T>
-    size_t ReadArray(T* data, size_t length) const {
+    std::size_t ReadArray(T* data, std::size_t length) const {
         static_assert(std::is_trivially_copyable_v<T>,
                       "Given array does not consist of trivially copyable objects");
 
         if (!IsOpen()) {
-            return std::numeric_limits<size_t>::max();
+            return std::numeric_limits<std::size_t>::max();
         }
 
         return std::fread(data, sizeof(T), length, m_file);
     }
 
     template <typename T>
-    size_t WriteArray(const T* data, size_t length) {
+    std::size_t WriteArray(const T* data, std::size_t length) {
         static_assert(std::is_trivially_copyable_v<T>,
                       "Given array does not consist of trivially copyable objects");
         if (!IsOpen()) {
-            return std::numeric_limits<size_t>::max();
+            return std::numeric_limits<std::size_t>::max();
         }
 
         return std::fwrite(data, sizeof(T), length, m_file);
     }
 
     template <typename T>
-    size_t ReadBytes(T* data, size_t length) const {
+    std::size_t ReadBytes(T* data, std::size_t length) const {
         static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
         return ReadArray(reinterpret_cast<char*>(data), length);
     }
 
     template <typename T>
-    size_t WriteBytes(const T* data, size_t length) {
+    std::size_t WriteBytes(const T* data, std::size_t length) {
         static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
         return WriteArray(reinterpret_cast<const char*>(data), length);
     }
 
     template <typename T>
-    size_t WriteObject(const T& object) {
+    std::size_t WriteObject(const T& object) {
         static_assert(!std::is_pointer_v<T>, "WriteObject arguments must not be a pointer");
         return WriteArray(&object, 1);
     }
 
-    size_t WriteString(const std::string& str) {
+    std::size_t WriteString(const std::string& str) {
         return WriteArray(str.c_str(), str.length());
     }
 
diff --git a/src/common/hash.h b/src/common/hash.h
index 2c761e545..40194d1ee 100644
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -17,7 +17,7 @@ namespace Common {
  * @param len Length of data (in bytes) to compute hash over
  * @returns 64-bit hash value that was computed over the data block
  */
-static inline u64 ComputeHash64(const void* data, size_t len) {
+static inline u64 ComputeHash64(const void* data, std::size_t len) {
     return CityHash64(static_cast<const char*>(data), len);
 }
 
@@ -63,7 +63,7 @@ struct HashableStruct {
         return !(*this == o);
     };
 
-    size_t Hash() const {
+    std::size_t Hash() const {
         return Common::ComputeStructHash64(state);
     }
 };
diff --git a/src/common/hex_util.cpp b/src/common/hex_util.cpp
index 8e0a9e46f..589ae5cbf 100644
--- a/src/common/hex_util.cpp
+++ b/src/common/hex_util.cpp
@@ -18,7 +18,7 @@ u8 ToHexNibble(char c1) {
     return 0;
 }
 
-std::array<u8, 16> operator""_array16(const char* str, size_t len) {
+std::array<u8, 16> operator""_array16(const char* str, std::size_t len) {
     if (len != 32) {
         LOG_ERROR(Common,
                   "Attempting to parse string to array that is not of correct size (expected=32, "
@@ -29,7 +29,7 @@ std::array<u8, 16> operator""_array16(const char* str, size_t len) {
     return HexStringToArray<16>(str);
 }
 
-std::array<u8, 32> operator""_array32(const char* str, size_t len) {
+std::array<u8, 32> operator""_array32(const char* str, std::size_t len) {
     if (len != 64) {
         LOG_ERROR(Common,
                   "Attempting to parse string to array that is not of correct size (expected=64, "
diff --git a/src/common/hex_util.h b/src/common/hex_util.h
index 5fb79bb72..863a5ccd9 100644
--- a/src/common/hex_util.h
+++ b/src/common/hex_util.h
@@ -14,20 +14,20 @@ namespace Common {
 
 u8 ToHexNibble(char c1);
 
-template <size_t Size, bool le = false>
+template <std::size_t Size, bool le = false>
 std::array<u8, Size> HexStringToArray(std::string_view str) {
     std::array<u8, Size> out{};
     if constexpr (le) {
-        for (size_t i = 2 * Size - 2; i <= 2 * Size; i -= 2)
+        for (std::size_t i = 2 * Size - 2; i <= 2 * Size; i -= 2)
             out[i / 2] = (ToHexNibble(str[i]) << 4) | ToHexNibble(str[i + 1]);
     } else {
-        for (size_t i = 0; i < 2 * Size; i += 2)
+        for (std::size_t i = 0; i < 2 * Size; i += 2)
             out[i / 2] = (ToHexNibble(str[i]) << 4) | ToHexNibble(str[i + 1]);
     }
     return out;
 }
 
-template <size_t Size>
+template <std::size_t Size>
 std::string HexArrayToString(std::array<u8, Size> array, bool upper = true) {
     std::string out;
     for (u8 c : array)
@@ -35,7 +35,7 @@ std::string HexArrayToString(std::array<u8, Size> array, bool upper = true) {
     return out;
 }
 
-std::array<u8, 0x10> operator"" _array16(const char* str, size_t len);
-std::array<u8, 0x20> operator"" _array32(const char* str, size_t len);
+std::array<u8, 0x10> operator"" _array16(const char* str, std::size_t len);
+std::array<u8, 0x20> operator"" _array32(const char* str, std::size_t len);
 
 } // namespace Common
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 1323f8d0f..efd776db6 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -135,7 +135,7 @@ FileBackend::FileBackend(const std::string& filename)
 void FileBackend::Write(const Entry& entry) {
     // prevent logs from going over the maximum size (in case its spamming and the user doesn't
     // know)
-    constexpr size_t MAX_BYTES_WRITTEN = 50 * 1024L * 1024L;
+    constexpr std::size_t MAX_BYTES_WRITTEN = 50 * 1024L * 1024L;
     if (!file.IsOpen() || bytes_written > MAX_BYTES_WRITTEN) {
         return;
     }
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index b3f4b9cef..11edbf1b6 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -100,7 +100,7 @@ public:
 
 private:
     FileUtil::IOFile file;
-    size_t bytes_written;
+    std::size_t bytes_written;
 };
 
 void AddBackend(std::unique_ptr<Backend> backend);
diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp
index 2dd331152..2eccbcd8d 100644
--- a/src/common/logging/filter.cpp
+++ b/src/common/logging/filter.cpp
@@ -71,7 +71,7 @@ void Filter::ResetAll(Level level) {
 }
 
 void Filter::SetClassLevel(Class log_class, Level level) {
-    class_levels[static_cast<size_t>(log_class)] = level;
+    class_levels[static_cast<std::size_t>(log_class)] = level;
 }
 
 void Filter::ParseFilterString(std::string_view filter_view) {
@@ -93,7 +93,8 @@ void Filter::ParseFilterString(std::string_view filter_view) {
 }
 
 bool Filter::CheckMessage(Class log_class, Level level) const {
-    return static_cast<u8>(level) >= static_cast<u8>(class_levels[static_cast<size_t>(log_class)]);
+    return static_cast<u8>(level) >=
+           static_cast<u8>(class_levels[static_cast<std::size_t>(log_class)]);
 }
 
 bool Filter::IsDebug() const {
diff --git a/src/common/logging/filter.h b/src/common/logging/filter.h
index f7e3b87c9..773df6f2c 100644
--- a/src/common/logging/filter.h
+++ b/src/common/logging/filter.h
@@ -49,6 +49,6 @@ public:
     bool IsDebug() const;
 
 private:
-    std::array<Level, static_cast<size_t>(Class::Count)> class_levels;
+    std::array<Level, static_cast<std::size_t>(Class::Count)> class_levels;
 };
 } // namespace Log
diff --git a/src/common/memory_util.cpp b/src/common/memory_util.cpp
index 09462ccee..9736fb12a 100644
--- a/src/common/memory_util.cpp
+++ b/src/common/memory_util.cpp
@@ -25,7 +25,7 @@
 // This is purposely not a full wrapper for virtualalloc/mmap, but it
 // provides exactly the primitive operations that Dolphin needs.
 
-void* AllocateExecutableMemory(size_t size, bool low) {
+void* AllocateExecutableMemory(std::size_t size, bool low) {
 #if defined(_WIN32)
     void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
 #else
@@ -74,7 +74,7 @@ void* AllocateExecutableMemory(size_t size, bool low) {
     return ptr;
 }
 
-void* AllocateMemoryPages(size_t size) {
+void* AllocateMemoryPages(std::size_t size) {
 #ifdef _WIN32
     void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE);
 #else
@@ -90,7 +90,7 @@ void* AllocateMemoryPages(size_t size) {
     return ptr;
 }
 
-void* AllocateAlignedMemory(size_t size, size_t alignment) {
+void* AllocateAlignedMemory(std::size_t size, std::size_t alignment) {
 #ifdef _WIN32
     void* ptr = _aligned_malloc(size, alignment);
 #else
@@ -109,7 +109,7 @@ void* AllocateAlignedMemory(size_t size, size_t alignment) {
     return ptr;
 }
 
-void FreeMemoryPages(void* ptr, size_t size) {
+void FreeMemoryPages(void* ptr, std::size_t size) {
     if (ptr) {
 #ifdef _WIN32
         if (!VirtualFree(ptr, 0, MEM_RELEASE))
@@ -130,7 +130,7 @@ void FreeAlignedMemory(void* ptr) {
     }
 }
 
-void WriteProtectMemory(void* ptr, size_t size, bool allowExecute) {
+void WriteProtectMemory(void* ptr, std::size_t size, bool allowExecute) {
 #ifdef _WIN32
     DWORD oldValue;
     if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldValue))
@@ -140,7 +140,7 @@ void WriteProtectMemory(void* ptr, size_t size, bool allowExecute) {
 #endif
 }
 
-void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute) {
+void UnWriteProtectMemory(void* ptr, std::size_t size, bool allowExecute) {
 #ifdef _WIN32
     DWORD oldValue;
     if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE,
diff --git a/src/common/memory_util.h b/src/common/memory_util.h
index 76ca5a30c..aad071979 100644
--- a/src/common/memory_util.h
+++ b/src/common/memory_util.h
@@ -7,13 +7,13 @@
 #include <cstddef>
 #include <string>
 
-void* AllocateExecutableMemory(size_t size, bool low = true);
-void* AllocateMemoryPages(size_t size);
-void FreeMemoryPages(void* ptr, size_t size);
-void* AllocateAlignedMemory(size_t size, size_t alignment);
+void* AllocateExecutableMemory(std::size_t size, bool low = true);
+void* AllocateMemoryPages(std::size_t size);
+void FreeMemoryPages(void* ptr, std::size_t size);
+void* AllocateAlignedMemory(std::size_t size, std::size_t alignment);
 void FreeAlignedMemory(void* ptr);
-void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
-void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false);
+void WriteProtectMemory(void* ptr, std::size_t size, bool executable = false);
+void UnWriteProtectMemory(void* ptr, std::size_t size, bool allowExecute = false);
 std::string MemUsage();
 
 inline int GetPageSize() {
diff --git a/src/common/misc.cpp b/src/common/misc.cpp
index 3fa8a3bc4..68cb86cd1 100644
--- a/src/common/misc.cpp
+++ b/src/common/misc.cpp
@@ -16,7 +16,7 @@
 // Call directly after the command or use the error num.
 // This function might change the error code.
 std::string GetLastErrorMsg() {
-    static const size_t buff_size = 255;
+    static const std::size_t buff_size = 255;
     char err_str[buff_size];
 
 #ifdef _WIN32
diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h
new file mode 100644
index 000000000..45926c9ec
--- /dev/null
+++ b/src/common/ring_buffer.h
@@ -0,0 +1,111 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cstddef>
+#include <cstring>
+#include <type_traits>
+#include <vector>
+#include "common/common_types.h"
+
+namespace Common {
+
+/// SPSC ring buffer
+/// @tparam T            Element type
+/// @tparam capacity     Number of slots in ring buffer
+/// @tparam granularity  Slot size in terms of number of elements
+template <typename T, std::size_t capacity, std::size_t granularity = 1>
+class RingBuffer {
+    /// A "slot" is made of `granularity` elements of `T`.
+    static constexpr std::size_t slot_size = granularity * sizeof(T);
+    // T must be safely memcpy-able and have a trivial default constructor.
+    static_assert(std::is_trivial_v<T>);
+    // Ensure capacity is sensible.
+    static_assert(capacity < std::numeric_limits<std::size_t>::max() / 2 / granularity);
+    static_assert((capacity & (capacity - 1)) == 0, "capacity must be a power of two");
+    // Ensure lock-free.
+    static_assert(std::atomic<std::size_t>::is_always_lock_free);
+
+public:
+    /// Pushes slots into the ring buffer
+    /// @param new_slots   Pointer to the slots to push
+    /// @param slot_count  Number of slots to push
+    /// @returns The number of slots actually pushed
+    std::size_t Push(const void* new_slots, std::size_t slot_count) {
+        const std::size_t write_index = m_write_index.load();
+        const std::size_t slots_free = capacity + m_read_index.load() - write_index;
+        const std::size_t push_count = std::min(slot_count, slots_free);
+
+        const std::size_t pos = write_index % capacity;
+        const std::size_t first_copy = std::min(capacity - pos, push_count);
+        const std::size_t second_copy = push_count - first_copy;
+
+        const char* in = static_cast<const char*>(new_slots);
+        std::memcpy(m_data.data() + pos * granularity, in, first_copy * slot_size);
+        in += first_copy * slot_size;
+        std::memcpy(m_data.data(), in, second_copy * slot_size);
+
+        m_write_index.store(write_index + push_count);
+
+        return push_count;
+    }
+
+    std::size_t Push(const std::vector<T>& input) {
+        return Push(input.data(), input.size());
+    }
+
+    /// Pops slots from the ring buffer
+    /// @param output     Where to store the popped slots
+    /// @param max_slots  Maximum number of slots to pop
+    /// @returns The number of slots actually popped
+    std::size_t Pop(void* output, std::size_t max_slots = ~std::size_t(0)) {
+        const std::size_t read_index = m_read_index.load();
+        const std::size_t slots_filled = m_write_index.load() - read_index;
+        const std::size_t pop_count = std::min(slots_filled, max_slots);
+
+        const std::size_t pos = read_index % capacity;
+        const std::size_t first_copy = std::min(capacity - pos, pop_count);
+        const std::size_t second_copy = pop_count - first_copy;
+
+        char* out = static_cast<char*>(output);
+        std::memcpy(out, m_data.data() + pos * granularity, first_copy * slot_size);
+        out += first_copy * slot_size;
+        std::memcpy(out, m_data.data(), second_copy * slot_size);
+
+        m_read_index.store(read_index + pop_count);
+
+        return pop_count;
+    }
+
+    std::vector<T> Pop(std::size_t max_slots = ~std::size_t(0)) {
+        std::vector<T> out(std::min(max_slots, capacity) * granularity);
+        const std::size_t count = Pop(out.data(), out.size() / granularity);
+        out.resize(count * granularity);
+        return out;
+    }
+
+    /// @returns Number of slots used
+    std::size_t Size() const {
+        return m_write_index.load() - m_read_index.load();
+    }
+
+    /// @returns Maximum size of ring buffer
+    constexpr std::size_t Capacity() const {
+        return capacity;
+    }
+
+private:
+    // It is important to align the below variables for performance reasons:
+    // Having them on the same cache-line would result in false-sharing between them.
+    alignas(128) std::atomic<std::size_t> m_read_index{0};
+    alignas(128) std::atomic<std::size_t> m_write_index{0};
+
+    std::array<T, granularity * capacity> m_data;
+};
+
+} // namespace Common
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index 0ca663032..c9a5425a7 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -37,7 +37,7 @@ std::string ToUpper(std::string str) {
 }
 
 // For Debugging. Read out an u8 array.
-std::string ArrayToString(const u8* data, size_t size, int line_len, bool spaces) {
+std::string ArrayToString(const u8* data, std::size_t size, int line_len, bool spaces) {
     std::ostringstream oss;
     oss << std::setfill('0') << std::hex;
 
@@ -60,7 +60,7 @@ std::string StringFromBuffer(const std::vector<u8>& data) {
 
 // Turns "  hej " into "hej". Also handles tabs.
 std::string StripSpaces(const std::string& str) {
-    const size_t s = str.find_first_not_of(" \t\r\n");
+    const std::size_t s = str.find_first_not_of(" \t\r\n");
 
     if (str.npos != s)
         return str.substr(s, str.find_last_not_of(" \t\r\n") - s + 1);
@@ -121,10 +121,10 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _
     if (full_path.empty())
         return false;
 
-    size_t dir_end = full_path.find_last_of("/"
+    std::size_t dir_end = full_path.find_last_of("/"
 // windows needs the : included for something like just "C:" to be considered a directory
 #ifdef _WIN32
-                                            "\\:"
+                                                 "\\:"
 #endif
     );
     if (std::string::npos == dir_end)
@@ -132,7 +132,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _
     else
         dir_end += 1;
 
-    size_t fname_end = full_path.rfind('.');
+    std::size_t fname_end = full_path.rfind('.');
     if (fname_end < dir_end || std::string::npos == fname_end)
         fname_end = full_path.size();
 
@@ -172,7 +172,7 @@ void SplitString(const std::string& str, const char delim, std::vector<std::stri
 }
 
 std::string TabsToSpaces(int tab_size, std::string in) {
-    size_t i = 0;
+    std::size_t i = 0;
 
     while ((i = in.find('\t')) != std::string::npos) {
         in.replace(i, 1, tab_size, ' ');
@@ -182,7 +182,7 @@ std::string TabsToSpaces(int tab_size, std::string in) {
 }
 
 std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest) {
-    size_t pos = 0;
+    std::size_t pos = 0;
 
     if (src == dest)
         return result;
@@ -280,22 +280,22 @@ static std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>&
         return {};
     }
 
-    const size_t in_bytes = sizeof(T) * input.size();
+    const std::size_t in_bytes = sizeof(T) * input.size();
     // Multiply by 4, which is the max number of bytes to encode a codepoint
-    const size_t out_buffer_size = 4 * in_bytes;
+    const std::size_t out_buffer_size = 4 * in_bytes;
 
     std::string out_buffer(out_buffer_size, '\0');
 
     auto src_buffer = &input[0];
-    size_t src_bytes = in_bytes;
+    std::size_t src_bytes = in_bytes;
     auto dst_buffer = &out_buffer[0];
-    size_t dst_bytes = out_buffer.size();
+    std::size_t dst_bytes = out_buffer.size();
 
     while (0 != src_bytes) {
-        size_t const iconv_result =
+        std::size_t const iconv_result =
             iconv(conv_desc, (char**)(&src_buffer), &src_bytes, &dst_buffer, &dst_bytes);
 
-        if (static_cast<size_t>(-1) == iconv_result) {
+        if (static_cast<std::size_t>(-1) == iconv_result) {
             if (EILSEQ == errno || EINVAL == errno) {
                 // Try to skip the bad character
                 if (0 != src_bytes) {
@@ -326,22 +326,22 @@ std::u16string UTF8ToUTF16(const std::string& input) {
         return {};
     }
 
-    const size_t in_bytes = sizeof(char) * input.size();
+    const std::size_t in_bytes = sizeof(char) * input.size();
     // Multiply by 4, which is the max number of bytes to encode a codepoint
-    const size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes;
+    const std::size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes;
 
     std::u16string out_buffer(out_buffer_size, char16_t{});
 
     char* src_buffer = const_cast<char*>(&input[0]);
-    size_t src_bytes = in_bytes;
+    std::size_t src_bytes = in_bytes;
     char* dst_buffer = (char*)(&out_buffer[0]);
-    size_t dst_bytes = out_buffer.size();
+    std::size_t dst_bytes = out_buffer.size();
 
     while (0 != src_bytes) {
-        size_t const iconv_result =
+        std::size_t const iconv_result =
             iconv(conv_desc, &src_buffer, &src_bytes, &dst_buffer, &dst_bytes);
 
-        if (static_cast<size_t>(-1) == iconv_result) {
+        if (static_cast<std::size_t>(-1) == iconv_result) {
             if (EILSEQ == errno || EINVAL == errno) {
                 // Try to skip the bad character
                 if (0 != src_bytes) {
@@ -381,8 +381,8 @@ std::string SHIFTJISToUTF8(const std::string& input) {
 
 #endif
 
-std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, size_t max_len) {
-    size_t len = 0;
+std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, std::size_t max_len) {
+    std::size_t len = 0;
     while (len < max_len && buffer[len] != '\0')
         ++len;
 
diff --git a/src/common/string_util.h b/src/common/string_util.h
index 4a2143b59..dcca6bc38 100644
--- a/src/common/string_util.h
+++ b/src/common/string_util.h
@@ -19,7 +19,7 @@ std::string ToLower(std::string str);
 /// Make a string uppercase
 std::string ToUpper(std::string str);
 
-std::string ArrayToString(const u8* data, size_t size, int line_len = 20, bool spaces = true);
+std::string ArrayToString(const u8* data, std::size_t size, int line_len = 20, bool spaces = true);
 
 std::string StringFromBuffer(const std::vector<u8>& data);
 
@@ -118,7 +118,7 @@ bool ComparePartialString(InIt begin, InIt end, const char* other) {
  * Creates a std::string from a fixed-size NUL-terminated char buffer. If the buffer isn't
  * NUL-terminated then the string ends at max_len characters.
  */
-std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, size_t max_len);
+std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, std::size_t max_len);
 
 /**
  * Attempts to trim an arbitrary prefix from `path`, leaving only the part starting at `root`. It's
diff --git a/src/common/thread.h b/src/common/thread.h
index 9465e1de7..12a1c095c 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -60,12 +60,12 @@ private:
 
 class Barrier {
 public:
-    explicit Barrier(size_t count_) : count(count_), waiting(0), generation(0) {}
+    explicit Barrier(std::size_t count_) : count(count_), waiting(0), generation(0) {}
 
     /// Blocks until all "count" threads have called Sync()
     void Sync() {
         std::unique_lock<std::mutex> lk(mutex);
-        const size_t current_generation = generation;
+        const std::size_t current_generation = generation;
 
         if (++waiting == count) {
             generation++;
@@ -80,9 +80,9 @@ public:
 private:
     std::condition_variable condvar;
     std::mutex mutex;
-    const size_t count;
-    size_t waiting;
-    size_t generation; // Incremented once each time the barrier is used
+    const std::size_t count;
+    std::size_t waiting;
+    std::size_t generation; // Incremented once each time the barrier is used
 };
 
 void SleepCurrentThread(int ms);
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 927da9187..636a5c0f9 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -97,7 +97,7 @@ const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({
     Xbyak::util::xmm15,
 });
 
-constexpr size_t ABI_SHADOW_SPACE = 0x20;
+constexpr std::size_t ABI_SHADOW_SPACE = 0x20;
 
 #else
 
@@ -147,22 +147,23 @@ const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({
     Xbyak::util::r15,
 });
 
-constexpr size_t ABI_SHADOW_SPACE = 0;
+constexpr std::size_t ABI_SHADOW_SPACE = 0;
 
 #endif
 
-inline void ABI_CalculateFrameSize(BitSet32 regs, size_t rsp_alignment, size_t needed_frame_size,
-                                   s32* out_subtraction, s32* out_xmm_offset) {
+inline void ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment,
+                                   std::size_t needed_frame_size, s32* out_subtraction,
+                                   s32* out_xmm_offset) {
     int count = (regs & ABI_ALL_GPRS).Count();
     rsp_alignment -= count * 8;
-    size_t subtraction = 0;
+    std::size_t subtraction = 0;
     int xmm_count = (regs & ABI_ALL_XMMS).Count();
     if (xmm_count) {
         // If we have any XMMs to save, we must align the stack here.
         subtraction = rsp_alignment & 0xF;
     }
     subtraction += 0x10 * xmm_count;
-    size_t xmm_base_subtraction = subtraction;
+    std::size_t xmm_base_subtraction = subtraction;
     subtraction += needed_frame_size;
     subtraction += ABI_SHADOW_SPACE;
     // Final alignment.
@@ -173,8 +174,9 @@ inline void ABI_CalculateFrameSize(BitSet32 regs, size_t rsp_alignment, size_t n
     *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
 }
 
-inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
-                                              size_t rsp_alignment, size_t needed_frame_size = 0) {
+inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
+                                                   std::size_t rsp_alignment,
+                                                   std::size_t needed_frame_size = 0) {
     s32 subtraction, xmm_offset;
     ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
 
@@ -195,7 +197,8 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet
 }
 
 inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
-                                           size_t rsp_alignment, size_t needed_frame_size = 0) {
+                                           std::size_t rsp_alignment,
+                                           std::size_t needed_frame_size = 0) {
     s32 subtraction, xmm_offset;
     ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
 
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
index 02323a017..5cc8a8c76 100644
--- a/src/common/x64/xbyak_util.h
+++ b/src/common/x64/xbyak_util.h
@@ -34,7 +34,7 @@ inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
 template <typename T>
 inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
     static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
-    size_t addr = reinterpret_cast<size_t>(f);
+    std::size_t addr = reinterpret_cast<std::size_t>(f);
     if (IsWithin2G(code, addr)) {
         code.call(f);
     } else {
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index c368745b1..0b2af2a9b 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -31,11 +31,11 @@ public:
     virtual void Step() = 0;
 
     /// Maps a backing memory region for the CPU
-    virtual void MapBackingMemory(VAddr address, size_t size, u8* memory,
+    virtual void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                                   Kernel::VMAPermission perms) = 0;
 
     /// Unmaps a region of memory that was previously mapped using MapBackingMemory
-    virtual void UnmapMemory(VAddr address, size_t size) = 0;
+    virtual void UnmapMemory(VAddr address, std::size_t size) = 0;
 
     /// Clear all instruction cache
     virtual void ClearInstructionCache() = 0;
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index b47f04988..0c175d872 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -58,7 +58,7 @@ public:
         Memory::Write64(vaddr + 8, value[1]);
     }
 
-    void InterpreterFallback(u64 pc, size_t num_instructions) override {
+    void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
         LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc,
                  num_instructions, MemoryReadCode(pc));
 
@@ -81,7 +81,7 @@ public:
             return;
         default:
             ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:X})",
-                       static_cast<size_t>(exception), pc);
+                       static_cast<std::size_t>(exception), pc);
         }
     }
 
@@ -110,7 +110,7 @@ public:
     }
 
     ARM_Dynarmic& parent;
-    size_t num_interpreted_instructions = 0;
+    std::size_t num_interpreted_instructions = 0;
     u64 tpidrro_el0 = 0;
     u64 tpidr_el0 = 0;
 };
@@ -157,7 +157,8 @@ void ARM_Dynarmic::Step() {
     cb->InterpreterFallback(jit->GetPC(), 1);
 }
 
-ARM_Dynarmic::ARM_Dynarmic(std::shared_ptr<ExclusiveMonitor> exclusive_monitor, size_t core_index)
+ARM_Dynarmic::ARM_Dynarmic(std::shared_ptr<ExclusiveMonitor> exclusive_monitor,
+                           std::size_t core_index)
     : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index},
       exclusive_monitor{std::dynamic_pointer_cast<DynarmicExclusiveMonitor>(exclusive_monitor)} {
     ThreadContext ctx;
@@ -168,12 +169,12 @@ ARM_Dynarmic::ARM_Dynarmic(std::shared_ptr<ExclusiveMonitor> exclusive_monitor,
 
 ARM_Dynarmic::~ARM_Dynarmic() = default;
 
-void ARM_Dynarmic::MapBackingMemory(u64 address, size_t size, u8* memory,
+void ARM_Dynarmic::MapBackingMemory(u64 address, std::size_t size, u8* memory,
                                     Kernel::VMAPermission perms) {
     inner_unicorn.MapBackingMemory(address, size, memory, perms);
 }
 
-void ARM_Dynarmic::UnmapMemory(u64 address, size_t size) {
+void ARM_Dynarmic::UnmapMemory(u64 address, std::size_t size) {
     inner_unicorn.UnmapMemory(address, size);
 }
 
@@ -269,10 +270,10 @@ void ARM_Dynarmic::PageTableChanged() {
     current_page_table = Memory::GetCurrentPageTable();
 }
 
-DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(size_t core_count) : monitor(core_count) {}
+DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}
 DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
 
-void DynarmicExclusiveMonitor::SetExclusive(size_t core_index, VAddr addr) {
+void DynarmicExclusiveMonitor::SetExclusive(std::size_t core_index, VAddr addr) {
     // Size doesn't actually matter.
     monitor.Mark(core_index, addr, 16);
 }
@@ -281,27 +282,27 @@ void DynarmicExclusiveMonitor::ClearExclusive() {
     monitor.Clear();
 }
 
-bool DynarmicExclusiveMonitor::ExclusiveWrite8(size_t core_index, VAddr vaddr, u8 value) {
+bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
     return monitor.DoExclusiveOperation(core_index, vaddr, 1,
                                         [&] { Memory::Write8(vaddr, value); });
 }
 
-bool DynarmicExclusiveMonitor::ExclusiveWrite16(size_t core_index, VAddr vaddr, u16 value) {
+bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
     return monitor.DoExclusiveOperation(core_index, vaddr, 2,
                                         [&] { Memory::Write16(vaddr, value); });
 }
 
-bool DynarmicExclusiveMonitor::ExclusiveWrite32(size_t core_index, VAddr vaddr, u32 value) {
+bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
     return monitor.DoExclusiveOperation(core_index, vaddr, 4,
                                         [&] { Memory::Write32(vaddr, value); });
 }
 
-bool DynarmicExclusiveMonitor::ExclusiveWrite64(size_t core_index, VAddr vaddr, u64 value) {
+bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
     return monitor.DoExclusiveOperation(core_index, vaddr, 8,
                                         [&] { Memory::Write64(vaddr, value); });
 }
 
-bool DynarmicExclusiveMonitor::ExclusiveWrite128(size_t core_index, VAddr vaddr, u128 value) {
+bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
     return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] {
         Memory::Write64(vaddr, value[0]);
         Memory::Write64(vaddr, value[1]);
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 3bdfd8cd9..56c60c853 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -19,12 +19,12 @@ class DynarmicExclusiveMonitor;
 
 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    ARM_Dynarmic(std::shared_ptr<ExclusiveMonitor> exclusive_monitor, size_t core_index);
+    ARM_Dynarmic(std::shared_ptr<ExclusiveMonitor> exclusive_monitor, std::size_t core_index);
     ~ARM_Dynarmic();
 
-    void MapBackingMemory(VAddr address, size_t size, u8* memory,
+    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                           Kernel::VMAPermission perms) override;
-    void UnmapMemory(u64 address, size_t size) override;
+    void UnmapMemory(u64 address, std::size_t size) override;
     void SetPC(u64 pc) override;
     u64 GetPC() const override;
     u64 GetReg(int index) const override;
@@ -59,7 +59,7 @@ private:
     std::unique_ptr<Dynarmic::A64::Jit> jit;
     ARM_Unicorn inner_unicorn;
 
-    size_t core_index;
+    std::size_t core_index;
     std::shared_ptr<DynarmicExclusiveMonitor> exclusive_monitor;
 
     Memory::PageTable* current_page_table = nullptr;
@@ -67,17 +67,17 @@ private:
 
 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
 public:
-    explicit DynarmicExclusiveMonitor(size_t core_count);
+    explicit DynarmicExclusiveMonitor(std::size_t core_count);
     ~DynarmicExclusiveMonitor();
 
-    void SetExclusive(size_t core_index, VAddr addr) override;
+    void SetExclusive(std::size_t core_index, VAddr addr) override;
     void ClearExclusive() override;
 
-    bool ExclusiveWrite8(size_t core_index, VAddr vaddr, u8 value) override;
-    bool ExclusiveWrite16(size_t core_index, VAddr vaddr, u16 value) override;
-    bool ExclusiveWrite32(size_t core_index, VAddr vaddr, u32 value) override;
-    bool ExclusiveWrite64(size_t core_index, VAddr vaddr, u64 value) override;
-    bool ExclusiveWrite128(size_t core_index, VAddr vaddr, u128 value) override;
+    bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
+    bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
+    bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override;
+    bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override;
+    bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override;
 
 private:
     friend class ARM_Dynarmic;
diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h
index 6f9b51573..f59aca667 100644
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@@ -12,14 +12,14 @@ class ExclusiveMonitor {
 public:
     virtual ~ExclusiveMonitor();
 
-    virtual void SetExclusive(size_t core_index, VAddr addr) = 0;
+    virtual void SetExclusive(std::size_t core_index, VAddr addr) = 0;
     virtual void ClearExclusive() = 0;
 
-    virtual bool ExclusiveWrite8(size_t core_index, VAddr vaddr, u8 value) = 0;
-    virtual bool ExclusiveWrite16(size_t core_index, VAddr vaddr, u16 value) = 0;
-    virtual bool ExclusiveWrite32(size_t core_index, VAddr vaddr, u32 value) = 0;
-    virtual bool ExclusiveWrite64(size_t core_index, VAddr vaddr, u64 value) = 0;
-    virtual bool ExclusiveWrite128(size_t core_index, VAddr vaddr, u128 value) = 0;
+    virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
+    virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;
+    virtual bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) = 0;
+    virtual bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) = 0;
+    virtual bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) = 0;
 };
 
 } // namespace Core
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 4c4de2623..4e02b7cd4 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -90,12 +90,12 @@ ARM_Unicorn::~ARM_Unicorn() {
     CHECKED(uc_close(uc));
 }
 
-void ARM_Unicorn::MapBackingMemory(VAddr address, size_t size, u8* memory,
+void ARM_Unicorn::MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                                    Kernel::VMAPermission perms) {
     CHECKED(uc_mem_map_ptr(uc, address, size, static_cast<u32>(perms), memory));
 }
 
-void ARM_Unicorn::UnmapMemory(VAddr address, size_t size) {
+void ARM_Unicorn::UnmapMemory(VAddr address, std::size_t size) {
     CHECKED(uc_mem_unmap(uc, address, size));
 }
 
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index bd6b2f723..d6f7cf4ab 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -15,9 +15,9 @@ class ARM_Unicorn final : public ARM_Interface {
 public:
     ARM_Unicorn();
     ~ARM_Unicorn();
-    void MapBackingMemory(VAddr address, size_t size, u8* memory,
+    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                           Kernel::VMAPermission perms) override;
-    void UnmapMemory(VAddr address, size_t size) override;
+    void UnmapMemory(VAddr address, std::size_t size) override;
     void SetPC(u64 pc) override;
     u64 GetPC() const override;
     u64 GetReg(int index) const override;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 713ee17c1..50f0a42fb 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -140,7 +140,7 @@ struct System::Impl {
 
         cpu_barrier = std::make_shared<CpuBarrier>();
         cpu_exclusive_monitor = Cpu::MakeExclusiveMonitor(cpu_cores.size());
-        for (size_t index = 0; index < cpu_cores.size(); ++index) {
+        for (std::size_t index = 0; index < cpu_cores.size(); ++index) {
             cpu_cores[index] = std::make_shared<Cpu>(cpu_exclusive_monitor, cpu_barrier, index);
         }
 
@@ -161,7 +161,7 @@ struct System::Impl {
         // CPU core 0 is run on the main thread
         thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
         if (Settings::values.use_multi_core) {
-            for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
+            for (std::size_t index = 0; index < cpu_core_threads.size(); ++index) {
                 cpu_core_threads[index] =
                     std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
                 thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
@@ -285,7 +285,7 @@ struct System::Impl {
     std::shared_ptr<CpuBarrier> cpu_barrier;
     std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
     std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;
-    size_t active_core{}; ///< Active core, only used in single thread mode
+    std::size_t active_core{}; ///< Active core, only used in single thread mode
 
     /// Service manager
     std::shared_ptr<Service::SM::ServiceManager> service_manager;
@@ -348,7 +348,7 @@ ARM_Interface& System::CurrentArmInterface() {
     return CurrentCpuCore().ArmInterface();
 }
 
-size_t System::CurrentCoreIndex() {
+std::size_t System::CurrentCoreIndex() {
     return CurrentCpuCore().CoreIndex();
 }
 
@@ -356,7 +356,7 @@ Kernel::Scheduler& System::CurrentScheduler() {
     return *CurrentCpuCore().Scheduler();
 }
 
-const std::shared_ptr<Kernel::Scheduler>& System::Scheduler(size_t core_index) {
+const std::shared_ptr<Kernel::Scheduler>& System::Scheduler(std::size_t core_index) {
     ASSERT(core_index < NUM_CPU_CORES);
     return impl->cpu_cores[core_index]->Scheduler();
 }
@@ -369,12 +369,12 @@ const Kernel::SharedPtr<Kernel::Process>& System::CurrentProcess() const {
     return impl->kernel.CurrentProcess();
 }
 
-ARM_Interface& System::ArmInterface(size_t core_index) {
+ARM_Interface& System::ArmInterface(std::size_t core_index) {
     ASSERT(core_index < NUM_CPU_CORES);
     return impl->cpu_cores[core_index]->ArmInterface();
 }
 
-Cpu& System::CpuCore(size_t core_index) {
+Cpu& System::CpuCore(std::size_t core_index) {
     ASSERT(core_index < NUM_CPU_CORES);
     return *impl->cpu_cores[core_index];
 }
diff --git a/src/core/core.h b/src/core/core.h
index ab3663427..f9a3e97e3 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -145,16 +145,16 @@ public:
     ARM_Interface& CurrentArmInterface();
 
     /// Gets the index of the currently running CPU core
-    size_t CurrentCoreIndex();
+    std::size_t CurrentCoreIndex();
 
     /// Gets the scheduler for the CPU core that is currently running
     Kernel::Scheduler& CurrentScheduler();
 
     /// Gets an ARM interface to the CPU core with the specified index
-    ARM_Interface& ArmInterface(size_t core_index);
+    ARM_Interface& ArmInterface(std::size_t core_index);
 
     /// Gets a CPU interface to the CPU core with the specified index
-    Cpu& CpuCore(size_t core_index);
+    Cpu& CpuCore(std::size_t core_index);
 
     /// Gets the exclusive monitor
     ExclusiveMonitor& Monitor();
@@ -172,7 +172,7 @@ public:
     const VideoCore::RendererBase& Renderer() const;
 
     /// Gets the scheduler for the CPU core with the specified index
-    const std::shared_ptr<Kernel::Scheduler>& Scheduler(size_t core_index);
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler(std::size_t core_index);
 
     /// Provides a reference to the current process
     Kernel::SharedPtr<Kernel::Process>& CurrentProcess();
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index b042ee02b..15d60cc8a 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -49,7 +49,7 @@ bool CpuBarrier::Rendezvous() {
 }
 
 Cpu::Cpu(std::shared_ptr<ExclusiveMonitor> exclusive_monitor,
-         std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
+         std::shared_ptr<CpuBarrier> cpu_barrier, std::size_t core_index)
     : cpu_barrier{std::move(cpu_barrier)}, core_index{core_index} {
 
     if (Settings::values.use_cpu_jit) {
@@ -66,7 +66,7 @@ Cpu::Cpu(std::shared_ptr<ExclusiveMonitor> exclusive_monitor,
     scheduler = std::make_shared<Kernel::Scheduler>(arm_interface.get());
 }
 
-std::shared_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(size_t num_cores) {
+std::shared_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) {
     if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
         return std::make_shared<DynarmicExclusiveMonitor>(num_cores);
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 40ed34b47..1d229b42f 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -42,7 +42,7 @@ private:
 class Cpu {
 public:
     Cpu(std::shared_ptr<ExclusiveMonitor> exclusive_monitor,
-        std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index);
+        std::shared_ptr<CpuBarrier> cpu_barrier, std::size_t core_index);
 
     void RunLoop(bool tight_loop = true);
 
@@ -66,11 +66,11 @@ public:
         return core_index == 0;
     }
 
-    size_t CoreIndex() const {
+    std::size_t CoreIndex() const {
         return core_index;
     }
 
-    static std::shared_ptr<ExclusiveMonitor> MakeExclusiveMonitor(size_t num_cores);
+    static std::shared_ptr<ExclusiveMonitor> MakeExclusiveMonitor(std::size_t num_cores);
 
 private:
     void Reschedule();
@@ -80,7 +80,7 @@ private:
     std::shared_ptr<Kernel::Scheduler> scheduler;
 
     std::atomic<bool> reschedule_pending = false;
-    size_t core_index;
+    std::size_t core_index;
 };
 
 } // namespace Core
diff --git a/src/core/crypto/aes_util.cpp b/src/core/crypto/aes_util.cpp
index 89ade5000..4be76bb43 100644
--- a/src/core/crypto/aes_util.cpp
+++ b/src/core/crypto/aes_util.cpp
@@ -10,9 +10,9 @@
 
 namespace Core::Crypto {
 namespace {
-std::vector<u8> CalculateNintendoTweak(size_t sector_id) {
+std::vector<u8> CalculateNintendoTweak(std::size_t sector_id) {
     std::vector<u8> out(0x10);
-    for (size_t i = 0xF; i <= 0xF; --i) {
+    for (std::size_t i = 0xF; i <= 0xF; --i) {
         out[i] = sector_id & 0xFF;
         sector_id >>= 8;
     }
@@ -20,11 +20,14 @@ std::vector<u8> CalculateNintendoTweak(size_t sector_id) {
 }
 } // Anonymous namespace
 
-static_assert(static_cast<size_t>(Mode::CTR) == static_cast<size_t>(MBEDTLS_CIPHER_AES_128_CTR),
+static_assert(static_cast<std::size_t>(Mode::CTR) ==
+                  static_cast<std::size_t>(MBEDTLS_CIPHER_AES_128_CTR),
               "CTR has incorrect value.");
-static_assert(static_cast<size_t>(Mode::ECB) == static_cast<size_t>(MBEDTLS_CIPHER_AES_128_ECB),
+static_assert(static_cast<std::size_t>(Mode::ECB) ==
+                  static_cast<std::size_t>(MBEDTLS_CIPHER_AES_128_ECB),
               "ECB has incorrect value.");
-static_assert(static_cast<size_t>(Mode::XTS) == static_cast<size_t>(MBEDTLS_CIPHER_AES_128_XTS),
+static_assert(static_cast<std::size_t>(Mode::XTS) ==
+                  static_cast<std::size_t>(MBEDTLS_CIPHER_AES_128_XTS),
               "XTS has incorrect value.");
 
 // Structure to hide mbedtls types from header file
@@ -33,7 +36,7 @@ struct CipherContext {
     mbedtls_cipher_context_t decryption_context;
 };
 
-template <typename Key, size_t KeySize>
+template <typename Key, std::size_t KeySize>
 Crypto::AESCipher<Key, KeySize>::AESCipher(Key key, Mode mode)
     : ctx(std::make_unique<CipherContext>()) {
     mbedtls_cipher_init(&ctx->encryption_context);
@@ -54,26 +57,26 @@ Crypto::AESCipher<Key, KeySize>::AESCipher(Key key, Mode mode)
     //"Failed to set key on mbedtls ciphers.");
 }
 
-template <typename Key, size_t KeySize>
+template <typename Key, std::size_t KeySize>
 AESCipher<Key, KeySize>::~AESCipher() {
     mbedtls_cipher_free(&ctx->encryption_context);
     mbedtls_cipher_free(&ctx->decryption_context);
 }
 
-template <typename Key, size_t KeySize>
+template <typename Key, std::size_t KeySize>
 void AESCipher<Key, KeySize>::SetIV(std::vector<u8> iv) {
     ASSERT_MSG((mbedtls_cipher_set_iv(&ctx->encryption_context, iv.data(), iv.size()) ||
                 mbedtls_cipher_set_iv(&ctx->decryption_context, iv.data(), iv.size())) == 0,
                "Failed to set IV on mbedtls ciphers.");
 }
 
-template <typename Key, size_t KeySize>
-void AESCipher<Key, KeySize>::Transcode(const u8* src, size_t size, u8* dest, Op op) const {
+template <typename Key, std::size_t KeySize>
+void AESCipher<Key, KeySize>::Transcode(const u8* src, std::size_t size, u8* dest, Op op) const {
     auto* const context = op == Op::Encrypt ? &ctx->encryption_context : &ctx->decryption_context;
 
     mbedtls_cipher_reset(context);
 
-    size_t written = 0;
+    std::size_t written = 0;
     if (mbedtls_cipher_get_cipher_mode(context) == MBEDTLS_MODE_XTS) {
         mbedtls_cipher_update(context, src, size, dest, &written);
         if (written != size) {
@@ -90,8 +93,8 @@ void AESCipher<Key, KeySize>::Transcode(const u8* src, size_t size, u8* dest, Op
             return;
         }
 
-        for (size_t offset = 0; offset < size; offset += block_size) {
-            auto length = std::min<size_t>(block_size, size - offset);
+        for (std::size_t offset = 0; offset < size; offset += block_size) {
+            auto length = std::min<std::size_t>(block_size, size - offset);
             mbedtls_cipher_update(context, src + offset, length, dest + offset, &written);
             if (written != length) {
                 if (length < block_size) {
@@ -110,12 +113,12 @@ void AESCipher<Key, KeySize>::Transcode(const u8* src, size_t size, u8* dest, Op
     mbedtls_cipher_finish(context, nullptr, nullptr);
 }
 
-template <typename Key, size_t KeySize>
-void AESCipher<Key, KeySize>::XTSTranscode(const u8* src, size_t size, u8* dest, size_t sector_id,
-                                           size_t sector_size, Op op) {
+template <typename Key, std::size_t KeySize>
+void AESCipher<Key, KeySize>::XTSTranscode(const u8* src, std::size_t size, u8* dest,
+                                           std::size_t sector_id, std::size_t sector_size, Op op) {
     ASSERT_MSG(size % sector_size == 0, "XTS decryption size must be a multiple of sector size.");
 
-    for (size_t i = 0; i < size; i += sector_size) {
+    for (std::size_t i = 0; i < size; i += sector_size) {
         SetIV(CalculateNintendoTweak(sector_id++));
         Transcode<u8, u8>(src + i, sector_size, dest + i, op);
     }
diff --git a/src/core/crypto/aes_util.h b/src/core/crypto/aes_util.h
index 8ce9d6612..edc4ab910 100644
--- a/src/core/crypto/aes_util.h
+++ b/src/core/crypto/aes_util.h
@@ -25,7 +25,7 @@ enum class Op {
     Decrypt,
 };
 
-template <typename Key, size_t KeySize = sizeof(Key)>
+template <typename Key, std::size_t KeySize = sizeof(Key)>
 class AESCipher {
     static_assert(std::is_same_v<Key, std::array<u8, KeySize>>, "Key must be std::array of u8.");
     static_assert(KeySize == 0x10 || KeySize == 0x20, "KeySize must be 128 or 256.");
@@ -38,25 +38,25 @@ public:
     void SetIV(std::vector<u8> iv);
 
     template <typename Source, typename Dest>
-    void Transcode(const Source* src, size_t size, Dest* dest, Op op) const {
+    void Transcode(const Source* src, std::size_t size, Dest* dest, Op op) const {
         static_assert(std::is_trivially_copyable_v<Source> && std::is_trivially_copyable_v<Dest>,
                       "Transcode source and destination types must be trivially copyable.");
         Transcode(reinterpret_cast<const u8*>(src), size, reinterpret_cast<u8*>(dest), op);
     }
 
-    void Transcode(const u8* src, size_t size, u8* dest, Op op) const;
+    void Transcode(const u8* src, std::size_t size, u8* dest, Op op) const;
 
     template <typename Source, typename Dest>
-    void XTSTranscode(const Source* src, size_t size, Dest* dest, size_t sector_id,
-                      size_t sector_size, Op op) {
+    void XTSTranscode(const Source* src, std::size_t size, Dest* dest, std::size_t sector_id,
+                      std::size_t sector_size, Op op) {
         static_assert(std::is_trivially_copyable_v<Source> && std::is_trivially_copyable_v<Dest>,
                       "XTSTranscode source and destination types must be trivially copyable.");
         XTSTranscode(reinterpret_cast<const u8*>(src), size, reinterpret_cast<u8*>(dest), sector_id,
                      sector_size, op);
     }
 
-    void XTSTranscode(const u8* src, size_t size, u8* dest, size_t sector_id, size_t sector_size,
-                      Op op);
+    void XTSTranscode(const u8* src, std::size_t size, u8* dest, std::size_t sector_id,
+                      std::size_t sector_size, Op op);
 
 private:
     std::unique_ptr<CipherContext> ctx;
diff --git a/src/core/crypto/ctr_encryption_layer.cpp b/src/core/crypto/ctr_encryption_layer.cpp
index 296fad419..902841c77 100644
--- a/src/core/crypto/ctr_encryption_layer.cpp
+++ b/src/core/crypto/ctr_encryption_layer.cpp
@@ -8,11 +8,12 @@
 
 namespace Core::Crypto {
 
-CTREncryptionLayer::CTREncryptionLayer(FileSys::VirtualFile base_, Key128 key_, size_t base_offset)
+CTREncryptionLayer::CTREncryptionLayer(FileSys::VirtualFile base_, Key128 key_,
+                                       std::size_t base_offset)
     : EncryptionLayer(std::move(base_)), base_offset(base_offset), cipher(key_, Mode::CTR),
       iv(16, 0) {}
 
-size_t CTREncryptionLayer::Read(u8* data, size_t length, size_t offset) const {
+std::size_t CTREncryptionLayer::Read(u8* data, std::size_t length, std::size_t offset) const {
     if (length == 0)
         return 0;
 
@@ -28,7 +29,7 @@ size_t CTREncryptionLayer::Read(u8* data, size_t length, size_t offset) const {
     std::vector<u8> block = base->ReadBytes(0x10, offset - sector_offset);
     UpdateIV(base_offset + offset - sector_offset);
     cipher.Transcode(block.data(), block.size(), block.data(), Op::Decrypt);
-    size_t read = 0x10 - sector_offset;
+    std::size_t read = 0x10 - sector_offset;
 
     if (length + sector_offset < 0x10) {
         std::memcpy(data, block.data() + sector_offset, std::min<u64>(length, read));
@@ -43,9 +44,9 @@ void CTREncryptionLayer::SetIV(const std::vector<u8>& iv_) {
     iv.assign(iv_.cbegin(), iv_.cbegin() + length);
 }
 
-void CTREncryptionLayer::UpdateIV(size_t offset) const {
+void CTREncryptionLayer::UpdateIV(std::size_t offset) const {
     offset >>= 4;
-    for (size_t i = 0; i < 8; ++i) {
+    for (std::size_t i = 0; i < 8; ++i) {
         iv[16 - i - 1] = offset & 0xFF;
         offset >>= 8;
     }
diff --git a/src/core/crypto/ctr_encryption_layer.h b/src/core/crypto/ctr_encryption_layer.h
index 11b8683c7..a7bf810f4 100644
--- a/src/core/crypto/ctr_encryption_layer.h
+++ b/src/core/crypto/ctr_encryption_layer.h
@@ -14,20 +14,20 @@ namespace Core::Crypto {
 // Sits on top of a VirtualFile and provides CTR-mode AES decription.
 class CTREncryptionLayer : public EncryptionLayer {
 public:
-    CTREncryptionLayer(FileSys::VirtualFile base, Key128 key, size_t base_offset);
+    CTREncryptionLayer(FileSys::VirtualFile base, Key128 key, std::size_t base_offset);
 
-    size_t Read(u8* data, size_t length, size_t offset) const override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
 
     void SetIV(const std::vector<u8>& iv);
 
 private:
-    size_t base_offset;
+    std::size_t base_offset;
 
     // Must be mutable as operations modify cipher contexts.
     mutable AESCipher<Key128> cipher;
     mutable std::vector<u8> iv;
 
-    void UpdateIV(size_t offset) const;
+    void UpdateIV(std::size_t offset) const;
 };
 
 } // namespace Core::Crypto
diff --git a/src/core/crypto/encryption_layer.cpp b/src/core/crypto/encryption_layer.cpp
index 4204527e3..4c377d7d4 100644
--- a/src/core/crypto/encryption_layer.cpp
+++ b/src/core/crypto/encryption_layer.cpp
@@ -12,11 +12,11 @@ std::string EncryptionLayer::GetName() const {
     return base->GetName();
 }
 
-size_t EncryptionLayer::GetSize() const {
+std::size_t EncryptionLayer::GetSize() const {
     return base->GetSize();
 }
 
-bool EncryptionLayer::Resize(size_t new_size) {
+bool EncryptionLayer::Resize(std::size_t new_size) {
     return false;
 }
 
@@ -32,7 +32,7 @@ bool EncryptionLayer::IsReadable() const {
     return true;
 }
 
-size_t EncryptionLayer::Write(const u8* data, size_t length, size_t offset) {
+std::size_t EncryptionLayer::Write(const u8* data, std::size_t length, std::size_t offset) {
     return 0;
 }
 
diff --git a/src/core/crypto/encryption_layer.h b/src/core/crypto/encryption_layer.h
index 7f05af9b4..53619cb38 100644
--- a/src/core/crypto/encryption_layer.h
+++ b/src/core/crypto/encryption_layer.h
@@ -15,15 +15,15 @@ class EncryptionLayer : public FileSys::VfsFile {
 public:
     explicit EncryptionLayer(FileSys::VirtualFile base);
 
-    size_t Read(u8* data, size_t length, size_t offset) const override = 0;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override = 0;
 
     std::string GetName() const override;
-    size_t GetSize() const override;
-    bool Resize(size_t new_size) override;
+    std::size_t GetSize() const override;
+    bool Resize(std::size_t new_size) override;
     std::shared_ptr<FileSys::VfsDirectory> GetContainingDirectory() const override;
     bool IsWritable() const override;
     bool IsReadable() const override;
-    size_t Write(const u8* data, size_t length, size_t offset) override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
     bool Rename(std::string_view name) override;
 
 protected:
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index 6f27f990b..bf3a70944 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -54,7 +54,7 @@ boost::optional<Key128> DeriveSDSeed() {
         return boost::none;
 
     std::array<u8, 0x10> buffer{};
-    size_t offset = 0;
+    std::size_t offset = 0;
     for (; offset + 0x10 < save_43.GetSize(); ++offset) {
         save_43.Seek(offset, SEEK_SET);
         save_43.ReadBytes(buffer.data(), buffer.size());
@@ -105,7 +105,7 @@ Loader::ResultStatus DeriveSDKeys(std::array<Key256, 2>& sd_keys, const KeyManag
 
     // Combine sources and seed
     for (auto& source : sd_key_sources) {
-        for (size_t i = 0; i < source.size(); ++i)
+        for (std::size_t i = 0; i < source.size(); ++i)
             source[i] ^= sd_seed[i & 0xF];
     }
 
@@ -207,7 +207,7 @@ Key256 KeyManager::GetKey(S256KeyType id, u64 field1, u64 field2) const {
     return s256_keys.at({id, field1, field2});
 }
 
-template <size_t Size>
+template <std::size_t Size>
 void KeyManager::WriteKeyToFile(bool title_key, std::string_view keyname,
                                 const std::array<u8, Size>& key) {
     const std::string yuzu_keys_dir = FileUtil::GetUserPath(FileUtil::UserPath::KeysDir);
diff --git a/src/core/crypto/key_manager.h b/src/core/crypto/key_manager.h
index ce67913bb..978eec8dc 100644
--- a/src/core/crypto/key_manager.h
+++ b/src/core/crypto/key_manager.h
@@ -108,7 +108,7 @@ private:
     void LoadFromFile(const std::string& filename, bool is_title_keys);
     void AttemptLoadKeyFile(const std::string& dir1, const std::string& dir2,
                             const std::string& filename, bool title);
-    template <size_t Size>
+    template <std::size_t Size>
     void WriteKeyToFile(bool title_key, std::string_view keyname, const std::array<u8, Size>& key);
 
     static const boost::container::flat_map<std::string, KeyIndex<S128KeyType>> s128_file_id;
diff --git a/src/core/crypto/xts_encryption_layer.cpp b/src/core/crypto/xts_encryption_layer.cpp
index c10832cfe..8f0ba4ee7 100644
--- a/src/core/crypto/xts_encryption_layer.cpp
+++ b/src/core/crypto/xts_encryption_layer.cpp
@@ -14,7 +14,7 @@ constexpr u64 XTS_SECTOR_SIZE = 0x4000;
 XTSEncryptionLayer::XTSEncryptionLayer(FileSys::VirtualFile base_, Key256 key_)
     : EncryptionLayer(std::move(base_)), cipher(key_, Mode::XTS) {}
 
-size_t XTSEncryptionLayer::Read(u8* data, size_t length, size_t offset) const {
+std::size_t XTSEncryptionLayer::Read(u8* data, std::size_t length, std::size_t offset) const {
     if (length == 0)
         return 0;
 
@@ -46,7 +46,7 @@ size_t XTSEncryptionLayer::Read(u8* data, size_t length, size_t offset) const {
         block.resize(XTS_SECTOR_SIZE);
     cipher.XTSTranscode(block.data(), block.size(), block.data(),
                         (offset - sector_offset) / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt);
-    const size_t read = XTS_SECTOR_SIZE - sector_offset;
+    const std::size_t read = XTS_SECTOR_SIZE - sector_offset;
 
     if (length + sector_offset < XTS_SECTOR_SIZE) {
         std::memcpy(data, block.data() + sector_offset, std::min<u64>(length, read));
diff --git a/src/core/crypto/xts_encryption_layer.h b/src/core/crypto/xts_encryption_layer.h
index 7a1f1dc64..5f8f00fe7 100644
--- a/src/core/crypto/xts_encryption_layer.h
+++ b/src/core/crypto/xts_encryption_layer.h
@@ -15,7 +15,7 @@ class XTSEncryptionLayer : public EncryptionLayer {
 public:
     XTSEncryptionLayer(FileSys::VirtualFile base, Key256 key);
 
-    size_t Read(u8* data, size_t length, size_t offset) const override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
 
 private:
     // Must be mutable as operations modify cipher contexts.
diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp
index 8218893b2..edfc1bbd4 100644
--- a/src/core/file_sys/card_image.cpp
+++ b/src/core/file_sys/card_image.cpp
@@ -41,13 +41,14 @@ XCI::XCI(VirtualFile file_) : file(std::move(file_)), partitions(0x4) {
 
     for (XCIPartition partition :
          {XCIPartition::Update, XCIPartition::Normal, XCIPartition::Secure, XCIPartition::Logo}) {
-        auto raw = main_hfs.GetFile(partition_names[static_cast<size_t>(partition)]);
+        auto raw = main_hfs.GetFile(partition_names[static_cast<std::size_t>(partition)]);
         if (raw != nullptr)
-            partitions[static_cast<size_t>(partition)] = std::make_shared<PartitionFilesystem>(raw);
+            partitions[static_cast<std::size_t>(partition)] =
+                std::make_shared<PartitionFilesystem>(raw);
     }
 
     secure_partition = std::make_shared<NSP>(
-        main_hfs.GetFile(partition_names[static_cast<size_t>(XCIPartition::Secure)]));
+        main_hfs.GetFile(partition_names[static_cast<std::size_t>(XCIPartition::Secure)]));
 
     const auto secure_ncas = secure_partition->GetNCAsCollapsed();
     std::copy(secure_ncas.begin(), secure_ncas.end(), std::back_inserter(ncas));
@@ -92,7 +93,7 @@ Loader::ResultStatus XCI::GetProgramNCAStatus() const {
 }
 
 VirtualDir XCI::GetPartition(XCIPartition partition) const {
-    return partitions[static_cast<size_t>(partition)];
+    return partitions[static_cast<std::size_t>(partition)];
 }
 
 std::shared_ptr<NSP> XCI::GetSecurePartitionNSP() const {
@@ -168,11 +169,11 @@ bool XCI::ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) {
 }
 
 Loader::ResultStatus XCI::AddNCAFromPartition(XCIPartition part) {
-    if (partitions[static_cast<size_t>(part)] == nullptr) {
+    if (partitions[static_cast<std::size_t>(part)] == nullptr) {
         return Loader::ResultStatus::ErrorXCIMissingPartition;
     }
 
-    for (const VirtualFile& file : partitions[static_cast<size_t>(part)]->GetFiles()) {
+    for (const VirtualFile& file : partitions[static_cast<std::size_t>(part)]->GetFiles()) {
         if (file->GetExtension() != "nca")
             continue;
         auto nca = std::make_shared<NCA>(file);
@@ -187,7 +188,7 @@ Loader::ResultStatus XCI::AddNCAFromPartition(XCIPartition part) {
         } else {
             const u16 error_id = static_cast<u16>(nca->GetStatus());
             LOG_CRITICAL(Loader, "Could not load NCA {}/{}, failed with error code {:04X} ({})",
-                         partition_names[static_cast<size_t>(part)], nca->GetName(), error_id,
+                         partition_names[static_cast<std::size_t>(part)], nca->GetName(), error_id,
                          nca->GetStatus());
         }
     }
diff --git a/src/core/file_sys/content_archive.cpp b/src/core/file_sys/content_archive.cpp
index 79bfb6fec..45fc0b574 100644
--- a/src/core/file_sys/content_archive.cpp
+++ b/src/core/file_sys/content_archive.cpp
@@ -298,11 +298,11 @@ NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_off
         auto section = sections[i];
 
         if (section.raw.header.filesystem_type == NCASectionFilesystemType::ROMFS) {
-            const size_t base_offset =
+            const std::size_t base_offset =
                 header.section_tables[i].media_offset * MEDIA_OFFSET_MULTIPLIER;
             ivfc_offset = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset;
-            const size_t romfs_offset = base_offset + ivfc_offset;
-            const size_t romfs_size = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].size;
+            const std::size_t romfs_offset = base_offset + ivfc_offset;
+            const std::size_t romfs_size = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].size;
             auto raw = std::make_shared<OffsetVfsFile>(file, romfs_size, romfs_offset);
             auto dec = Decrypt(section, raw, romfs_offset);
 
diff --git a/src/core/file_sys/directory.h b/src/core/file_sys/directory.h
index 3759e743a..12bb90ec8 100644
--- a/src/core/file_sys/directory.h
+++ b/src/core/file_sys/directory.h
@@ -25,7 +25,7 @@ enum EntryType : u8 {
 struct Entry {
     Entry(std::string_view view, EntryType entry_type, u64 entry_size)
         : type{entry_type}, file_size{entry_size} {
-        const size_t copy_size = view.copy(filename, std::size(filename) - 1);
+        const std::size_t copy_size = view.copy(filename, std::size(filename) - 1);
         filename[copy_size] = '\0';
     }
 
diff --git a/src/core/file_sys/nca_metadata.cpp b/src/core/file_sys/nca_metadata.cpp
index cdfbc5aaf..479916b69 100644
--- a/src/core/file_sys/nca_metadata.cpp
+++ b/src/core/file_sys/nca_metadata.cpp
@@ -11,11 +11,11 @@
 namespace FileSys {
 
 bool operator>=(TitleType lhs, TitleType rhs) {
-    return static_cast<size_t>(lhs) >= static_cast<size_t>(rhs);
+    return static_cast<std::size_t>(lhs) >= static_cast<std::size_t>(rhs);
 }
 
 bool operator<=(TitleType lhs, TitleType rhs) {
-    return static_cast<size_t>(lhs) <= static_cast<size_t>(rhs);
+    return static_cast<std::size_t>(lhs) <= static_cast<std::size_t>(rhs);
 }
 
 CNMT::CNMT(VirtualFile file) {
diff --git a/src/core/file_sys/nca_patch.cpp b/src/core/file_sys/nca_patch.cpp
index 6fc5bd7d8..0090cc6c4 100644
--- a/src/core/file_sys/nca_patch.cpp
+++ b/src/core/file_sys/nca_patch.cpp
@@ -22,11 +22,11 @@ BKTR::BKTR(VirtualFile base_romfs_, VirtualFile bktr_romfs_, RelocationBlock rel
       base_romfs(std::move(base_romfs_)), bktr_romfs(std::move(bktr_romfs_)),
       encrypted(is_encrypted_), key(key_), base_offset(base_offset_), ivfc_offset(ivfc_offset_),
       section_ctr(section_ctr_) {
-    for (size_t i = 0; i < relocation.number_buckets - 1; ++i) {
+    for (std::size_t i = 0; i < relocation.number_buckets - 1; ++i) {
         relocation_buckets[i].entries.push_back({relocation.base_offsets[i + 1], 0, 0});
     }
 
-    for (size_t i = 0; i < subsection.number_buckets - 1; ++i) {
+    for (std::size_t i = 0; i < subsection.number_buckets - 1; ++i) {
         subsection_buckets[i].entries.push_back({subsection_buckets[i + 1].entries[0].address_patch,
                                                  {0},
                                                  subsection_buckets[i + 1].entries[0].ctr});
@@ -37,7 +37,7 @@ BKTR::BKTR(VirtualFile base_romfs_, VirtualFile bktr_romfs_, RelocationBlock rel
 
 BKTR::~BKTR() = default;
 
-size_t BKTR::Read(u8* data, size_t length, size_t offset) const {
+std::size_t BKTR::Read(u8* data, std::size_t length, std::size_t offset) const {
     // Read out of bounds.
     if (offset >= relocation.size)
         return 0;
@@ -69,14 +69,14 @@ size_t BKTR::Read(u8* data, size_t length, size_t offset) const {
     std::vector<u8> iv(16);
     auto subsection_ctr = subsection.ctr;
     auto offset_iv = section_offset + base_offset;
-    for (size_t i = 0; i < section_ctr.size(); ++i)
+    for (std::size_t i = 0; i < section_ctr.size(); ++i)
         iv[i] = section_ctr[0x8 - i - 1];
     offset_iv >>= 4;
-    for (size_t i = 0; i < sizeof(u64); ++i) {
+    for (std::size_t i = 0; i < sizeof(u64); ++i) {
         iv[0xF - i] = static_cast<u8>(offset_iv & 0xFF);
         offset_iv >>= 8;
     }
-    for (size_t i = 0; i < sizeof(u32); ++i) {
+    for (std::size_t i = 0; i < sizeof(u32); ++i) {
         iv[0x7 - i] = static_cast<u8>(subsection_ctr & 0xFF);
         subsection_ctr >>= 8;
     }
@@ -110,8 +110,8 @@ size_t BKTR::Read(u8* data, size_t length, size_t offset) const {
 }
 
 template <bool Subsection, typename BlockType, typename BucketType>
-std::pair<size_t, size_t> BKTR::SearchBucketEntry(u64 offset, BlockType block,
-                                                  BucketType buckets) const {
+std::pair<std::size_t, std::size_t> BKTR::SearchBucketEntry(u64 offset, BlockType block,
+                                                            BucketType buckets) const {
     if constexpr (Subsection) {
         const auto last_bucket = buckets[block.number_buckets - 1];
         if (offset >= last_bucket.entries[last_bucket.number_entries].address_patch)
@@ -120,18 +120,18 @@ std::pair<size_t, size_t> BKTR::SearchBucketEntry(u64 offset, BlockType block,
         ASSERT_MSG(offset <= block.size, "Offset is out of bounds in BKTR relocation block.");
     }
 
-    size_t bucket_id = std::count_if(block.base_offsets.begin() + 1,
-                                     block.base_offsets.begin() + block.number_buckets,
-                                     [&offset](u64 base_offset) { return base_offset <= offset; });
+    std::size_t bucket_id = std::count_if(
+        block.base_offsets.begin() + 1, block.base_offsets.begin() + block.number_buckets,
+        [&offset](u64 base_offset) { return base_offset <= offset; });
 
     const auto bucket = buckets[bucket_id];
 
     if (bucket.number_entries == 1)
         return {bucket_id, 0};
 
-    size_t low = 0;
-    size_t mid = 0;
-    size_t high = bucket.number_entries - 1;
+    std::size_t low = 0;
+    std::size_t mid = 0;
+    std::size_t high = bucket.number_entries - 1;
     while (low <= high) {
         mid = (low + high) / 2;
         if (bucket.entries[mid].address_patch > offset) {
@@ -179,11 +179,11 @@ std::string BKTR::GetName() const {
     return base_romfs->GetName();
 }
 
-size_t BKTR::GetSize() const {
+std::size_t BKTR::GetSize() const {
     return relocation.size;
 }
 
-bool BKTR::Resize(size_t new_size) {
+bool BKTR::Resize(std::size_t new_size) {
     return false;
 }
 
@@ -199,7 +199,7 @@ bool BKTR::IsReadable() const {
     return true;
 }
 
-size_t BKTR::Write(const u8* data, size_t length, size_t offset) {
+std::size_t BKTR::Write(const u8* data, std::size_t length, std::size_t offset) {
     return 0;
 }
 
diff --git a/src/core/file_sys/nca_patch.h b/src/core/file_sys/nca_patch.h
index 381f3504f..8e64e8378 100644
--- a/src/core/file_sys/nca_patch.h
+++ b/src/core/file_sys/nca_patch.h
@@ -98,13 +98,13 @@ public:
          Core::Crypto::Key128 key, u64 base_offset, u64 ivfc_offset, std::array<u8, 8> section_ctr);
     ~BKTR() override;
 
-    size_t Read(u8* data, size_t length, size_t offset) const override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
 
     std::string GetName() const override;
 
-    size_t GetSize() const override;
+    std::size_t GetSize() const override;
 
-    bool Resize(size_t new_size) override;
+    bool Resize(std::size_t new_size) override;
 
     std::shared_ptr<VfsDirectory> GetContainingDirectory() const override;
 
@@ -112,14 +112,14 @@ public:
 
     bool IsReadable() const override;
 
-    size_t Write(const u8* data, size_t length, size_t offset) override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
 
     bool Rename(std::string_view name) override;
 
 private:
     template <bool Subsection, typename BlockType, typename BucketType>
-    std::pair<size_t, size_t> SearchBucketEntry(u64 offset, BlockType block,
-                                                BucketType buckets) const;
+    std::pair<std::size_t, std::size_t> SearchBucketEntry(u64 offset, BlockType block,
+                                                          BucketType buckets) const;
 
     RelocationEntry GetRelocationEntry(u64 offset) const;
     RelocationEntry GetNextRelocationEntry(u64 offset) const;
diff --git a/src/core/file_sys/partition_filesystem.cpp b/src/core/file_sys/partition_filesystem.cpp
index c377edc9c..f5b3b0175 100644
--- a/src/core/file_sys/partition_filesystem.cpp
+++ b/src/core/file_sys/partition_filesystem.cpp
@@ -42,21 +42,21 @@ PartitionFilesystem::PartitionFilesystem(std::shared_ptr<VfsFile> file) {
 
     is_hfs = pfs_header.magic == Common::MakeMagic('H', 'F', 'S', '0');
 
-    size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
-    size_t metadata_size =
+    std::size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
+    std::size_t metadata_size =
         sizeof(Header) + (pfs_header.num_entries * entry_size) + pfs_header.strtab_size;
 
     // Actually read in now...
     std::vector<u8> file_data = file->ReadBytes(metadata_size);
-    const size_t total_size = file_data.size();
+    const std::size_t total_size = file_data.size();
 
     if (total_size != metadata_size) {
         status = Loader::ResultStatus::ErrorIncorrectPFSFileSize;
         return;
     }
 
-    size_t entries_offset = sizeof(Header);
-    size_t strtab_offset = entries_offset + (pfs_header.num_entries * entry_size);
+    std::size_t entries_offset = sizeof(Header);
+    std::size_t strtab_offset = entries_offset + (pfs_header.num_entries * entry_size);
     content_offset = strtab_offset + pfs_header.strtab_size;
     for (u16 i = 0; i < pfs_header.num_entries; i++) {
         FSEntry entry;
diff --git a/src/core/file_sys/partition_filesystem.h b/src/core/file_sys/partition_filesystem.h
index be7bc32a8..e80d2456b 100644
--- a/src/core/file_sys/partition_filesystem.h
+++ b/src/core/file_sys/partition_filesystem.h
@@ -79,7 +79,7 @@ private:
 
     Header pfs_header{};
     bool is_hfs = false;
-    size_t content_offset = 0;
+    std::size_t content_offset = 0;
 
     std::vector<VirtualFile> pfs_files;
     std::vector<VirtualDir> pfs_dirs;
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 6cecab336..b37b4c68b 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -21,7 +21,7 @@ constexpr u64 SINGLE_BYTE_MODULUS = 0x100;
 std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
     std::array<u8, sizeof(u32)> bytes{};
     bytes[0] = version % SINGLE_BYTE_MODULUS;
-    for (size_t i = 1; i < bytes.size(); ++i) {
+    for (std::size_t i = 1; i < bytes.size(); ++i) {
         version /= SINGLE_BYTE_MODULUS;
         bytes[i] = version % SINGLE_BYTE_MODULUS;
     }
@@ -36,7 +36,7 @@ constexpr std::array<const char*, 1> PATCH_TYPE_NAMES{
 };
 
 std::string FormatPatchTypeName(PatchType type) {
-    return PATCH_TYPE_NAMES.at(static_cast<size_t>(type));
+    return PATCH_TYPE_NAMES.at(static_cast<std::size_t>(type));
 }
 
 PatchManager::PatchManager(u64 title_id) : title_id(title_id) {}
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index ccb685526..9d19aaa6d 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -13,7 +13,7 @@
 namespace FileSys {
 
 Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
-    size_t total_size = static_cast<size_t>(file->GetSize());
+    std::size_t total_size = static_cast<std::size_t>(file->GetSize());
     if (total_size < sizeof(Header))
         return Loader::ResultStatus::ErrorBadNPDMHeader;
 
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp
index 7361a67be..dad7ae10b 100644
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -62,11 +62,11 @@ static std::string GetCNMTName(TitleType type, u64 title_id) {
         "" ///< Currently unknown 'DeltaTitle'
     };
 
-    auto index = static_cast<size_t>(type);
+    auto index = static_cast<std::size_t>(type);
     // If the index is after the jump in TitleType, subtract it out.
-    if (index >= static_cast<size_t>(TitleType::Application)) {
-        index -= static_cast<size_t>(TitleType::Application) -
-                 static_cast<size_t>(TitleType::FirmwarePackageB);
+    if (index >= static_cast<std::size_t>(TitleType::Application)) {
+        index -= static_cast<std::size_t>(TitleType::Application) -
+                 static_cast<std::size_t>(TitleType::FirmwarePackageB);
     }
     return fmt::format("{}_{:016x}.cnmt", TITLE_TYPE_NAMES[index], title_id);
 }
@@ -105,7 +105,7 @@ VirtualFile RegisteredCache::OpenFileOrDirectoryConcat(const VirtualDir& dir,
         } else {
             std::vector<VirtualFile> concat;
             // Since the files are a two-digit hex number, max is FF.
-            for (size_t i = 0; i < 0x100; ++i) {
+            for (std::size_t i = 0; i < 0x100; ++i) {
                 auto next = nca_dir->GetFile(fmt::format("{:02X}", i));
                 if (next != nullptr) {
                     concat.push_back(std::move(next));
diff --git a/src/core/file_sys/romfs.cpp b/src/core/file_sys/romfs.cpp
index e490c8ace..9f6e41cdf 100644
--- a/src/core/file_sys/romfs.cpp
+++ b/src/core/file_sys/romfs.cpp
@@ -49,7 +49,7 @@ struct FileEntry {
 static_assert(sizeof(FileEntry) == 0x20, "FileEntry has incorrect size.");
 
 template <typename Entry>
-static std::pair<Entry, std::string> GetEntry(const VirtualFile& file, size_t offset) {
+static std::pair<Entry, std::string> GetEntry(const VirtualFile& file, std::size_t offset) {
     Entry entry{};
     if (file->ReadObject(&entry, offset) != sizeof(Entry))
         return {};
@@ -59,8 +59,8 @@ static std::pair<Entry, std::string> GetEntry(const VirtualFile& file, size_t of
     return {entry, string};
 }
 
-void ProcessFile(VirtualFile file, size_t file_offset, size_t data_offset, u32 this_file_offset,
-                 std::shared_ptr<VectorVfsDirectory> parent) {
+void ProcessFile(VirtualFile file, std::size_t file_offset, std::size_t data_offset,
+                 u32 this_file_offset, std::shared_ptr<VectorVfsDirectory> parent) {
     while (true) {
         auto entry = GetEntry<FileEntry>(file, file_offset + this_file_offset);
 
@@ -74,8 +74,9 @@ void ProcessFile(VirtualFile file, size_t file_offset, size_t data_offset, u32 t
     }
 }
 
-void ProcessDirectory(VirtualFile file, size_t dir_offset, size_t file_offset, size_t data_offset,
-                      u32 this_dir_offset, std::shared_ptr<VectorVfsDirectory> parent) {
+void ProcessDirectory(VirtualFile file, std::size_t dir_offset, std::size_t file_offset,
+                      std::size_t data_offset, u32 this_dir_offset,
+                      std::shared_ptr<VectorVfsDirectory> parent) {
     while (true) {
         auto entry = GetEntry<DirectoryEntry>(file, dir_offset + this_dir_offset);
         auto current = std::make_shared<VectorVfsDirectory>(
diff --git a/src/core/file_sys/vfs.cpp b/src/core/file_sys/vfs.cpp
index 146c839f4..d7b52abfd 100644
--- a/src/core/file_sys/vfs.cpp
+++ b/src/core/file_sys/vfs.cpp
@@ -167,18 +167,18 @@ std::string VfsFile::GetExtension() const {
 
 VfsDirectory::~VfsDirectory() = default;
 
-boost::optional<u8> VfsFile::ReadByte(size_t offset) const {
+boost::optional<u8> VfsFile::ReadByte(std::size_t offset) const {
     u8 out{};
-    size_t size = Read(&out, 1, offset);
+    std::size_t size = Read(&out, 1, offset);
     if (size == 1)
         return out;
 
     return boost::none;
 }
 
-std::vector<u8> VfsFile::ReadBytes(size_t size, size_t offset) const {
+std::vector<u8> VfsFile::ReadBytes(std::size_t size, std::size_t offset) const {
     std::vector<u8> out(size);
-    size_t read_size = Read(out.data(), size, offset);
+    std::size_t read_size = Read(out.data(), size, offset);
     out.resize(read_size);
     return out;
 }
@@ -187,11 +187,11 @@ std::vector<u8> VfsFile::ReadAllBytes() const {
     return ReadBytes(GetSize());
 }
 
-bool VfsFile::WriteByte(u8 data, size_t offset) {
+bool VfsFile::WriteByte(u8 data, std::size_t offset) {
     return Write(&data, 1, offset) == 1;
 }
 
-size_t VfsFile::WriteBytes(const std::vector<u8>& data, size_t offset) {
+std::size_t VfsFile::WriteBytes(const std::vector<u8>& data, std::size_t offset) {
     return Write(data.data(), data.size(), offset);
 }
 
@@ -215,7 +215,7 @@ std::shared_ptr<VfsFile> VfsDirectory::GetFileRelative(std::string_view path) co
     }
 
     auto dir = GetSubdirectory(vec[0]);
-    for (size_t component = 1; component < vec.size() - 1; ++component) {
+    for (std::size_t component = 1; component < vec.size() - 1; ++component) {
         if (dir == nullptr) {
             return nullptr;
         }
@@ -249,7 +249,7 @@ std::shared_ptr<VfsDirectory> VfsDirectory::GetDirectoryRelative(std::string_vie
     }
 
     auto dir = GetSubdirectory(vec[0]);
-    for (size_t component = 1; component < vec.size(); ++component) {
+    for (std::size_t component = 1; component < vec.size(); ++component) {
         if (dir == nullptr) {
             return nullptr;
         }
@@ -286,7 +286,7 @@ bool VfsDirectory::IsRoot() const {
     return GetParentDirectory() == nullptr;
 }
 
-size_t VfsDirectory::GetSize() const {
+std::size_t VfsDirectory::GetSize() const {
     const auto& files = GetFiles();
     const auto sum_sizes = [](const auto& range) {
         return std::accumulate(range.begin(), range.end(), 0ULL,
@@ -434,13 +434,13 @@ bool ReadOnlyVfsDirectory::Rename(std::string_view name) {
     return false;
 }
 
-bool DeepEquals(const VirtualFile& file1, const VirtualFile& file2, size_t block_size) {
+bool DeepEquals(const VirtualFile& file1, const VirtualFile& file2, std::size_t block_size) {
     if (file1->GetSize() != file2->GetSize())
         return false;
 
     std::vector<u8> f1_v(block_size);
     std::vector<u8> f2_v(block_size);
-    for (size_t i = 0; i < file1->GetSize(); i += block_size) {
+    for (std::size_t i = 0; i < file1->GetSize(); i += block_size) {
         auto f1_vs = file1->Read(f1_v.data(), block_size, i);
         auto f2_vs = file2->Read(f2_v.data(), block_size, i);
 
diff --git a/src/core/file_sys/vfs.h b/src/core/file_sys/vfs.h
index 5142a3e86..74489b452 100644
--- a/src/core/file_sys/vfs.h
+++ b/src/core/file_sys/vfs.h
@@ -92,9 +92,9 @@ public:
     // Retrieves the extension of the file name.
     virtual std::string GetExtension() const;
     // Retrieves the size of the file.
-    virtual size_t GetSize() const = 0;
+    virtual std::size_t GetSize() const = 0;
     // Resizes the file to new_size. Returns whether or not the operation was successful.
-    virtual bool Resize(size_t new_size) = 0;
+    virtual bool Resize(std::size_t new_size) = 0;
     // Gets a pointer to the directory containing this file, returning nullptr if there is none.
     virtual std::shared_ptr<VfsDirectory> GetContainingDirectory() const = 0;
 
@@ -105,15 +105,15 @@ public:
 
     // The primary method of reading from the file. Reads length bytes into data starting at offset
     // into file. Returns number of bytes successfully read.
-    virtual size_t Read(u8* data, size_t length, size_t offset = 0) const = 0;
+    virtual std::size_t Read(u8* data, std::size_t length, std::size_t offset = 0) const = 0;
     // The primary method of writing to the file. Writes length bytes from data starting at offset
     // into file. Returns number of bytes successfully written.
-    virtual size_t Write(const u8* data, size_t length, size_t offset = 0) = 0;
+    virtual std::size_t Write(const u8* data, std::size_t length, std::size_t offset = 0) = 0;
 
     // Reads exactly one byte at the offset provided, returning boost::none on error.
-    virtual boost::optional<u8> ReadByte(size_t offset = 0) const;
+    virtual boost::optional<u8> ReadByte(std::size_t offset = 0) const;
     // Reads size bytes starting at offset in file into a vector.
-    virtual std::vector<u8> ReadBytes(size_t size, size_t offset = 0) const;
+    virtual std::vector<u8> ReadBytes(std::size_t size, std::size_t offset = 0) const;
     // Reads all the bytes from the file into a vector. Equivalent to 'file->Read(file->GetSize(),
     // 0)'
     virtual std::vector<u8> ReadAllBytes() const;
@@ -121,7 +121,7 @@ public:
     // Reads an array of type T, size number_elements starting at offset.
     // Returns the number of bytes (sizeof(T)*number_elements) read successfully.
     template <typename T>
-    size_t ReadArray(T* data, size_t number_elements, size_t offset = 0) const {
+    std::size_t ReadArray(T* data, std::size_t number_elements, std::size_t offset = 0) const {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
 
         return Read(reinterpret_cast<u8*>(data), number_elements * sizeof(T), offset);
@@ -130,7 +130,7 @@ public:
     // Reads size bytes into the memory starting at data starting at offset into the file.
     // Returns the number of bytes read successfully.
     template <typename T>
-    size_t ReadBytes(T* data, size_t size, size_t offset = 0) const {
+    std::size_t ReadBytes(T* data, std::size_t size, std::size_t offset = 0) const {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
         return Read(reinterpret_cast<u8*>(data), size, offset);
     }
@@ -138,22 +138,22 @@ public:
     // Reads one object of type T starting at offset in file.
     // Returns the number of bytes read successfully (sizeof(T)).
     template <typename T>
-    size_t ReadObject(T* data, size_t offset = 0) const {
+    std::size_t ReadObject(T* data, std::size_t offset = 0) const {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
         return Read(reinterpret_cast<u8*>(data), sizeof(T), offset);
     }
 
     // Writes exactly one byte to offset in file and retuns whether or not the byte was written
     // successfully.
-    virtual bool WriteByte(u8 data, size_t offset = 0);
+    virtual bool WriteByte(u8 data, std::size_t offset = 0);
     // Writes a vector of bytes to offset in file and returns the number of bytes successfully
     // written.
-    virtual size_t WriteBytes(const std::vector<u8>& data, size_t offset = 0);
+    virtual std::size_t WriteBytes(const std::vector<u8>& data, std::size_t offset = 0);
 
     // Writes an array of type T, size number_elements to offset in file.
     // Returns the number of bytes (sizeof(T)*number_elements) written successfully.
     template <typename T>
-    size_t WriteArray(const T* data, size_t number_elements, size_t offset = 0) {
+    std::size_t WriteArray(const T* data, std::size_t number_elements, std::size_t offset = 0) {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
         return Write(data, number_elements * sizeof(T), offset);
     }
@@ -161,7 +161,7 @@ public:
     // Writes size bytes starting at memory location data to offset in file.
     // Returns the number of bytes written successfully.
     template <typename T>
-    size_t WriteBytes(const T* data, size_t size, size_t offset = 0) {
+    std::size_t WriteBytes(const T* data, std::size_t size, std::size_t offset = 0) {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
         return Write(reinterpret_cast<const u8*>(data), size, offset);
     }
@@ -169,7 +169,7 @@ public:
     // Writes one object of type T to offset in file.
     // Returns the number of bytes written successfully (sizeof(T)).
     template <typename T>
-    size_t WriteObject(const T& data, size_t offset = 0) {
+    std::size_t WriteObject(const T& data, std::size_t offset = 0) {
         static_assert(std::is_trivially_copyable_v<T>, "Data type must be trivially copyable.");
         return Write(&data, sizeof(T), offset);
     }
@@ -221,7 +221,7 @@ public:
     // Returns the name of the directory.
     virtual std::string GetName() const = 0;
     // Returns the total size of all files and subdirectories in this directory.
-    virtual size_t GetSize() const;
+    virtual std::size_t GetSize() const;
     // Returns the parent directory of this directory. Returns nullptr if this directory is root or
     // has no parent.
     virtual std::shared_ptr<VfsDirectory> GetParentDirectory() const = 0;
@@ -311,7 +311,7 @@ public:
 };
 
 // Compare the two files, byte-for-byte, in increments specificed by block_size
-bool DeepEquals(const VirtualFile& file1, const VirtualFile& file2, size_t block_size = 0x200);
+bool DeepEquals(const VirtualFile& file1, const VirtualFile& file2, std::size_t block_size = 0x200);
 
 // A method that copies the raw data between two different implementations of VirtualFile. If you
 // are using the same implementation, it is probably better to use the Copy method in the parent
diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp
index e6bf586a3..25a980cbb 100644
--- a/src/core/file_sys/vfs_concat.cpp
+++ b/src/core/file_sys/vfs_concat.cpp
@@ -20,7 +20,7 @@ VirtualFile ConcatenateFiles(std::vector<VirtualFile> files, std::string name) {
 
 ConcatenatedVfsFile::ConcatenatedVfsFile(std::vector<VirtualFile> files_, std::string name)
     : name(std::move(name)) {
-    size_t next_offset = 0;
+    std::size_t next_offset = 0;
     for (const auto& file : files_) {
         files[next_offset] = file;
         next_offset += file->GetSize();
@@ -35,13 +35,13 @@ std::string ConcatenatedVfsFile::GetName() const {
     return files.begin()->second->GetName();
 }
 
-size_t ConcatenatedVfsFile::GetSize() const {
+std::size_t ConcatenatedVfsFile::GetSize() const {
     if (files.empty())
         return 0;
     return files.rbegin()->first + files.rbegin()->second->GetSize();
 }
 
-bool ConcatenatedVfsFile::Resize(size_t new_size) {
+bool ConcatenatedVfsFile::Resize(std::size_t new_size) {
     return false;
 }
 
@@ -59,7 +59,7 @@ bool ConcatenatedVfsFile::IsReadable() const {
     return true;
 }
 
-size_t ConcatenatedVfsFile::Read(u8* data, size_t length, size_t offset) const {
+std::size_t ConcatenatedVfsFile::Read(u8* data, std::size_t length, std::size_t offset) const {
     auto entry = files.end();
     for (auto iter = files.begin(); iter != files.end(); ++iter) {
         if (iter->first > offset) {
@@ -84,7 +84,7 @@ size_t ConcatenatedVfsFile::Read(u8* data, size_t length, size_t offset) const {
     return entry->second->Read(data, length, offset - entry->first);
 }
 
-size_t ConcatenatedVfsFile::Write(const u8* data, size_t length, size_t offset) {
+std::size_t ConcatenatedVfsFile::Write(const u8* data, std::size_t length, std::size_t offset) {
     return 0;
 }
 
diff --git a/src/core/file_sys/vfs_concat.h b/src/core/file_sys/vfs_concat.h
index 686d32515..31775db7e 100644
--- a/src/core/file_sys/vfs_concat.h
+++ b/src/core/file_sys/vfs_concat.h
@@ -23,13 +23,13 @@ class ConcatenatedVfsFile : public VfsFile {
 
 public:
     std::string GetName() const override;
-    size_t GetSize() const override;
-    bool Resize(size_t new_size) override;
+    std::size_t GetSize() const override;
+    bool Resize(std::size_t new_size) override;
     std::shared_ptr<VfsDirectory> GetContainingDirectory() const override;
     bool IsWritable() const override;
     bool IsReadable() const override;
-    size_t Read(u8* data, size_t length, size_t offset) const override;
-    size_t Write(const u8* data, size_t length, size_t offset) override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
     bool Rename(std::string_view name) override;
 
 private:
diff --git a/src/core/file_sys/vfs_offset.cpp b/src/core/file_sys/vfs_offset.cpp
index 847cde2f5..f5ed291ea 100644
--- a/src/core/file_sys/vfs_offset.cpp
+++ b/src/core/file_sys/vfs_offset.cpp
@@ -9,7 +9,7 @@
 
 namespace FileSys {
 
-OffsetVfsFile::OffsetVfsFile(std::shared_ptr<VfsFile> file_, size_t size_, size_t offset_,
+OffsetVfsFile::OffsetVfsFile(std::shared_ptr<VfsFile> file_, std::size_t size_, std::size_t offset_,
                              std::string name_, VirtualDir parent_)
     : file(file_), offset(offset_), size(size_), name(std::move(name_)),
       parent(parent_ == nullptr ? file->GetContainingDirectory() : std::move(parent_)) {}
@@ -18,11 +18,11 @@ std::string OffsetVfsFile::GetName() const {
     return name.empty() ? file->GetName() : name;
 }
 
-size_t OffsetVfsFile::GetSize() const {
+std::size_t OffsetVfsFile::GetSize() const {
     return size;
 }
 
-bool OffsetVfsFile::Resize(size_t new_size) {
+bool OffsetVfsFile::Resize(std::size_t new_size) {
     if (offset + new_size < file->GetSize()) {
         size = new_size;
     } else {
@@ -47,22 +47,22 @@ bool OffsetVfsFile::IsReadable() const {
     return file->IsReadable();
 }
 
-size_t OffsetVfsFile::Read(u8* data, size_t length, size_t r_offset) const {
+std::size_t OffsetVfsFile::Read(u8* data, std::size_t length, std::size_t r_offset) const {
     return file->Read(data, TrimToFit(length, r_offset), offset + r_offset);
 }
 
-size_t OffsetVfsFile::Write(const u8* data, size_t length, size_t r_offset) {
+std::size_t OffsetVfsFile::Write(const u8* data, std::size_t length, std::size_t r_offset) {
     return file->Write(data, TrimToFit(length, r_offset), offset + r_offset);
 }
 
-boost::optional<u8> OffsetVfsFile::ReadByte(size_t r_offset) const {
+boost::optional<u8> OffsetVfsFile::ReadByte(std::size_t r_offset) const {
     if (r_offset < size)
         return file->ReadByte(offset + r_offset);
 
     return boost::none;
 }
 
-std::vector<u8> OffsetVfsFile::ReadBytes(size_t r_size, size_t r_offset) const {
+std::vector<u8> OffsetVfsFile::ReadBytes(std::size_t r_size, std::size_t r_offset) const {
     return file->ReadBytes(TrimToFit(r_size, r_offset), offset + r_offset);
 }
 
@@ -70,14 +70,14 @@ std::vector<u8> OffsetVfsFile::ReadAllBytes() const {
     return file->ReadBytes(size, offset);
 }
 
-bool OffsetVfsFile::WriteByte(u8 data, size_t r_offset) {
+bool OffsetVfsFile::WriteByte(u8 data, std::size_t r_offset) {
     if (r_offset < size)
         return file->WriteByte(data, offset + r_offset);
 
     return false;
 }
 
-size_t OffsetVfsFile::WriteBytes(const std::vector<u8>& data, size_t r_offset) {
+std::size_t OffsetVfsFile::WriteBytes(const std::vector<u8>& data, std::size_t r_offset) {
     return file->Write(data.data(), TrimToFit(data.size(), r_offset), offset + r_offset);
 }
 
@@ -85,12 +85,12 @@ bool OffsetVfsFile::Rename(std::string_view name) {
     return file->Rename(name);
 }
 
-size_t OffsetVfsFile::GetOffset() const {
+std::size_t OffsetVfsFile::GetOffset() const {
     return offset;
 }
 
-size_t OffsetVfsFile::TrimToFit(size_t r_size, size_t r_offset) const {
-    return std::clamp(r_size, size_t{0}, size - r_offset);
+std::size_t OffsetVfsFile::TrimToFit(std::size_t r_size, std::size_t r_offset) const {
+    return std::clamp(r_size, std::size_t{0}, size - r_offset);
 }
 
 } // namespace FileSys
diff --git a/src/core/file_sys/vfs_offset.h b/src/core/file_sys/vfs_offset.h
index cb92d1570..34cb180b3 100644
--- a/src/core/file_sys/vfs_offset.h
+++ b/src/core/file_sys/vfs_offset.h
@@ -17,33 +17,33 @@ namespace FileSys {
 // the size of this wrapper.
 class OffsetVfsFile : public VfsFile {
 public:
-    OffsetVfsFile(std::shared_ptr<VfsFile> file, size_t size, size_t offset = 0,
+    OffsetVfsFile(std::shared_ptr<VfsFile> file, std::size_t size, std::size_t offset = 0,
                   std::string new_name = "", VirtualDir new_parent = nullptr);
 
     std::string GetName() const override;
-    size_t GetSize() const override;
-    bool Resize(size_t new_size) override;
+    std::size_t GetSize() const override;
+    bool Resize(std::size_t new_size) override;
     std::shared_ptr<VfsDirectory> GetContainingDirectory() const override;
     bool IsWritable() const override;
     bool IsReadable() const override;
-    size_t Read(u8* data, size_t length, size_t offset) const override;
-    size_t Write(const u8* data, size_t length, size_t offset) override;
-    boost::optional<u8> ReadByte(size_t offset) const override;
-    std::vector<u8> ReadBytes(size_t size, size_t offset) const override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
+    boost::optional<u8> ReadByte(std::size_t offset) const override;
+    std::vector<u8> ReadBytes(std::size_t size, std::size_t offset) const override;
     std::vector<u8> ReadAllBytes() const override;
-    bool WriteByte(u8 data, size_t offset) override;
-    size_t WriteBytes(const std::vector<u8>& data, size_t offset) override;
+    bool WriteByte(u8 data, std::size_t offset) override;
+    std::size_t WriteBytes(const std::vector<u8>& data, std::size_t offset) override;
 
     bool Rename(std::string_view name) override;
 
-    size_t GetOffset() const;
+    std::size_t GetOffset() const;
 
 private:
-    size_t TrimToFit(size_t r_size, size_t r_offset) const;
+    std::size_t TrimToFit(std::size_t r_size, std::size_t r_offset) const;
 
     std::shared_ptr<VfsFile> file;
-    size_t offset;
-    size_t size;
+    std::size_t offset;
+    std::size_t size;
     std::string name;
     VirtualDir parent;
 };
diff --git a/src/core/file_sys/vfs_real.cpp b/src/core/file_sys/vfs_real.cpp
index 89b101145..5e242e20f 100644
--- a/src/core/file_sys/vfs_real.cpp
+++ b/src/core/file_sys/vfs_real.cpp
@@ -227,11 +227,11 @@ std::string RealVfsFile::GetName() const {
     return path_components.back();
 }
 
-size_t RealVfsFile::GetSize() const {
+std::size_t RealVfsFile::GetSize() const {
     return backing->GetSize();
 }
 
-bool RealVfsFile::Resize(size_t new_size) {
+bool RealVfsFile::Resize(std::size_t new_size) {
     return backing->Resize(new_size);
 }
 
@@ -247,13 +247,13 @@ bool RealVfsFile::IsReadable() const {
     return (perms & Mode::ReadWrite) != 0;
 }
 
-size_t RealVfsFile::Read(u8* data, size_t length, size_t offset) const {
+std::size_t RealVfsFile::Read(u8* data, std::size_t length, std::size_t offset) const {
     if (!backing->Seek(offset, SEEK_SET))
         return 0;
     return backing->ReadBytes(data, length);
 }
 
-size_t RealVfsFile::Write(const u8* data, size_t length, size_t offset) {
+std::size_t RealVfsFile::Write(const u8* data, std::size_t length, std::size_t offset) {
     if (!backing->Seek(offset, SEEK_SET))
         return 0;
     return backing->WriteBytes(data, length);
diff --git a/src/core/file_sys/vfs_real.h b/src/core/file_sys/vfs_real.h
index 7db86691f..681c43e82 100644
--- a/src/core/file_sys/vfs_real.h
+++ b/src/core/file_sys/vfs_real.h
@@ -48,13 +48,13 @@ public:
     ~RealVfsFile() override;
 
     std::string GetName() const override;
-    size_t GetSize() const override;
-    bool Resize(size_t new_size) override;
+    std::size_t GetSize() const override;
+    bool Resize(std::size_t new_size) override;
     std::shared_ptr<VfsDirectory> GetContainingDirectory() const override;
     bool IsWritable() const override;
     bool IsReadable() const override;
-    size_t Read(u8* data, size_t length, size_t offset) const override;
-    size_t Write(const u8* data, size_t length, size_t offset) override;
+    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
+    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
     bool Rename(std::string_view name) override;
 
 private:
diff --git a/src/core/file_sys/xts_archive.cpp b/src/core/file_sys/xts_archive.cpp
index 4dbc25c55..0173f71c1 100644
--- a/src/core/file_sys/xts_archive.cpp
+++ b/src/core/file_sys/xts_archive.cpp
@@ -25,8 +25,8 @@ namespace FileSys {
 constexpr u64 NAX_HEADER_PADDING_SIZE = 0x4000;
 
 template <typename SourceData, typename SourceKey, typename Destination>
-static bool CalculateHMAC256(Destination* out, const SourceKey* key, size_t key_length,
-                             const SourceData* data, size_t data_length) {
+static bool CalculateHMAC256(Destination* out, const SourceKey* key, std::size_t key_length,
+                             const SourceData* data, std::size_t data_length) {
     mbedtls_md_context_t context;
     mbedtls_md_init(&context);
 
@@ -91,7 +91,7 @@ Loader::ResultStatus NAX::Parse(std::string_view path) {
 
     const auto enc_keys = header->key_area;
 
-    size_t i = 0;
+    std::size_t i = 0;
     for (; i < sd_keys.size(); ++i) {
         std::array<Core::Crypto::Key128, 2> nax_keys{};
         if (!CalculateHMAC256(nax_keys.data(), sd_keys[i].data(), 0x10, std::string(path).c_str(),
@@ -99,7 +99,7 @@ Loader::ResultStatus NAX::Parse(std::string_view path) {
             return Loader::ResultStatus::ErrorNAXKeyHMACFailed;
         }
 
-        for (size_t j = 0; j < nax_keys.size(); ++j) {
+        for (std::size_t j = 0; j < nax_keys.size(); ++j) {
             Core::Crypto::AESCipher<Core::Crypto::Key128> cipher(nax_keys[j],
                                                                  Core::Crypto::Mode::ECB);
             cipher.Transcode(enc_keys[j].data(), 0x10, header->key_area[j].data(),
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 332e5c3d0..cfaf20a88 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -292,7 +292,7 @@ static u8 NibbleToHex(u8 n) {
  * @param src Pointer to array of output hex string characters.
  * @param len Length of src array.
  */
-static u32 HexToInt(const u8* src, size_t len) {
+static u32 HexToInt(const u8* src, std::size_t len) {
     u32 output = 0;
     while (len-- > 0) {
         output = (output << 4) | HexCharToValue(src[0]);
@@ -307,7 +307,7 @@ static u32 HexToInt(const u8* src, size_t len) {
  * @param src Pointer to array of output hex string characters.
  * @param len Length of src array.
  */
-static u64 HexToLong(const u8* src, size_t len) {
+static u64 HexToLong(const u8* src, std::size_t len) {
     u64 output = 0;
     while (len-- > 0) {
         output = (output << 4) | HexCharToValue(src[0]);
@@ -323,7 +323,7 @@ static u64 HexToLong(const u8* src, size_t len) {
  * @param src Pointer to array of u8 bytes.
  * @param len Length of src array.
  */
-static void MemToGdbHex(u8* dest, const u8* src, size_t len) {
+static void MemToGdbHex(u8* dest, const u8* src, std::size_t len) {
     while (len-- > 0) {
         u8 tmp = *src++;
         *dest++ = NibbleToHex(tmp >> 4);
@@ -338,7 +338,7 @@ static void MemToGdbHex(u8* dest, const u8* src, size_t len) {
  * @param src Pointer to array of output hex string characters.
  * @param len Length of src array.
  */
-static void GdbHexToMem(u8* dest, const u8* src, size_t len) {
+static void GdbHexToMem(u8* dest, const u8* src, std::size_t len) {
     while (len-- > 0) {
         *dest++ = (HexCharToValue(src[0]) << 4) | HexCharToValue(src[1]);
         src += 2;
@@ -406,7 +406,7 @@ static u64 GdbHexToLong(const u8* src) {
 /// Read a byte from the gdb client.
 static u8 ReadByte() {
     u8 c;
-    size_t received_size = recv(gdbserver_socket, reinterpret_cast<char*>(&c), 1, MSG_WAITALL);
+    std::size_t received_size = recv(gdbserver_socket, reinterpret_cast<char*>(&c), 1, MSG_WAITALL);
     if (received_size != 1) {
         LOG_ERROR(Debug_GDBStub, "recv failed: {}", received_size);
         Shutdown();
@@ -416,7 +416,7 @@ static u8 ReadByte() {
 }
 
 /// Calculate the checksum of the current command buffer.
-static u8 CalculateChecksum(const u8* buffer, size_t length) {
+static u8 CalculateChecksum(const u8* buffer, std::size_t length) {
     return static_cast<u8>(std::accumulate(buffer, buffer + length, 0, std::plus<u8>()));
 }
 
@@ -518,7 +518,7 @@ bool CheckBreakpoint(VAddr addr, BreakpointType type) {
  * @param packet Packet to be sent to client.
  */
 static void SendPacket(const char packet) {
-    size_t sent_size = send(gdbserver_socket, &packet, 1, 0);
+    std::size_t sent_size = send(gdbserver_socket, &packet, 1, 0);
     if (sent_size != 1) {
         LOG_ERROR(Debug_GDBStub, "send failed");
     }
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index eaa5395ac..419f45896 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -12,7 +12,7 @@
 namespace IPC {
 
 /// Size of the command buffer area, in 32-bit words.
-constexpr size_t COMMAND_BUFFER_LENGTH = 0x100 / sizeof(u32);
+constexpr std::size_t COMMAND_BUFFER_LENGTH = 0x100 / sizeof(u32);
 
 // These errors are commonly returned by invalid IPC translations, so alias them here for
 // convenience.
@@ -153,7 +153,7 @@ struct DataPayloadHeader {
     u32_le magic;
     INSERT_PADDING_WORDS(1);
 };
-static_assert(sizeof(DataPayloadHeader) == 8, "DataPayloadRequest size is incorrect");
+static_assert(sizeof(DataPayloadHeader) == 8, "DataPayloadHeader size is incorrect");
 
 struct DomainMessageHeader {
     enum class CommandType : u32_le {
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 0f3ffdb60..7545ecf2a 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -152,8 +152,8 @@ public:
     }
 
     void ValidateHeader() {
-        const size_t num_domain_objects = context->NumDomainObjects();
-        const size_t num_move_objects = context->NumMoveObjects();
+        const std::size_t num_domain_objects = context->NumDomainObjects();
+        const std::size_t num_move_objects = context->NumMoveObjects();
         ASSERT_MSG(!num_domain_objects || !num_move_objects,
                    "cannot move normal handles and domain objects");
         ASSERT_MSG((index - datapayload_index) == normal_params_size,
@@ -329,10 +329,10 @@ public:
     T PopRaw();
 
     template <typename T>
-    Kernel::SharedPtr<T> GetMoveObject(size_t index);
+    Kernel::SharedPtr<T> GetMoveObject(std::size_t index);
 
     template <typename T>
-    Kernel::SharedPtr<T> GetCopyObject(size_t index);
+    Kernel::SharedPtr<T> GetCopyObject(std::size_t index);
 
     template <class T>
     std::shared_ptr<T> PopIpcInterface() {
@@ -406,12 +406,12 @@ void RequestParser::Pop(First& first_value, Other&... other_values) {
 }
 
 template <typename T>
-Kernel::SharedPtr<T> RequestParser::GetMoveObject(size_t index) {
+Kernel::SharedPtr<T> RequestParser::GetMoveObject(std::size_t index) {
     return context->GetMoveObject<T>(index);
 }
 
 template <typename T>
-Kernel::SharedPtr<T> RequestParser::GetCopyObject(size_t index) {
+Kernel::SharedPtr<T> RequestParser::GetCopyObject(std::size_t index) {
     return context->GetCopyObject<T>(index);
 }
 
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 6657accd5..93577591f 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -35,16 +35,17 @@ static ResultCode WaitForAddress(VAddr address, s64 timeout) {
 
 // Gets the threads waiting on an address.
 static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
-    const auto RetrieveWaitingThreads =
-        [](size_t core_index, std::vector<SharedPtr<Thread>>& waiting_threads, VAddr arb_addr) {
-            const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
-            auto& thread_list = scheduler->GetThreadList();
-
-            for (auto& thread : thread_list) {
-                if (thread->arb_wait_address == arb_addr)
-                    waiting_threads.push_back(thread);
-            }
-        };
+    const auto RetrieveWaitingThreads = [](std::size_t core_index,
+                                           std::vector<SharedPtr<Thread>>& waiting_threads,
+                                           VAddr arb_addr) {
+        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+        auto& thread_list = scheduler->GetThreadList();
+
+        for (auto& thread : thread_list) {
+            if (thread->arb_wait_address == arb_addr)
+                waiting_threads.push_back(thread);
+        }
+    };
 
     // Retrieve all threads that are waiting for this address.
     std::vector<SharedPtr<Thread>> threads;
@@ -66,12 +67,12 @@ static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address)
 static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
     // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
     // them all.
-    size_t last = waiting_threads.size();
+    std::size_t last = waiting_threads.size();
     if (num_to_wake > 0)
         last = num_to_wake;
 
     // Signal the waiting threads.
-    for (size_t i = 0; i < last; i++) {
+    for (std::size_t i = 0; i < last; i++) {
         ASSERT(waiting_threads[i]->status == ThreadStatus::WaitArb);
         waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
         waiting_threads[i]->arb_wait_address = 0;
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index 4054d5db6..8c2be2681 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -17,10 +17,12 @@ enum {
 
     // Confirmed Switch OS error codes
     MaxConnectionsReached = 7,
+    InvalidSize = 101,
     InvalidAddress = 102,
     HandleTableFull = 105,
     InvalidMemoryState = 106,
     InvalidMemoryPermissions = 108,
+    InvalidThreadPriority = 112,
     InvalidProcessorId = 113,
     InvalidHandle = 114,
     InvalidCombination = 116,
@@ -28,6 +30,7 @@ enum {
     SynchronizationCanceled = 118,
     TooLarge = 119,
     InvalidEnumValue = 120,
+    NoSuchEntry = 121,
     InvalidState = 125,
     ResourceLimitExceeded = 132,
 };
@@ -36,7 +39,7 @@ enum {
 // WARNING: The kernel is quite inconsistent in it's usage of errors code. Make sure to always
 // double check that the code matches before re-using the constant.
 
-// TODO(bunnei): Replace these with correct errors for Switch OS
+// TODO(bunnei): Replace -1 with correct errors for Switch OS
 constexpr ResultCode ERR_HANDLE_TABLE_FULL(ErrorModule::Kernel, ErrCodes::HandleTableFull);
 constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE(-1);
 constexpr ResultCode ERR_PORT_NAME_TOO_LONG(ErrorModule::Kernel, ErrCodes::TooLarge);
@@ -53,15 +56,17 @@ constexpr ResultCode ERR_INVALID_ADDRESS_STATE(ErrorModule::Kernel, ErrCodes::In
 constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS(ErrorModule::Kernel,
                                                     ErrCodes::InvalidMemoryPermissions);
 constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle);
+constexpr ResultCode ERR_INVALID_PROCESSOR_ID(ErrorModule::Kernel, ErrCodes::InvalidProcessorId);
+constexpr ResultCode ERR_INVALID_SIZE(ErrorModule::Kernel, ErrCodes::InvalidSize);
 constexpr ResultCode ERR_INVALID_STATE(ErrorModule::Kernel, ErrCodes::InvalidState);
+constexpr ResultCode ERR_INVALID_THREAD_PRIORITY(ErrorModule::Kernel,
+                                                 ErrCodes::InvalidThreadPriority);
 constexpr ResultCode ERR_INVALID_POINTER(-1);
 constexpr ResultCode ERR_INVALID_OBJECT_ADDR(-1);
 constexpr ResultCode ERR_NOT_AUTHORIZED(-1);
 /// Alternate code returned instead of ERR_INVALID_HANDLE in some code paths.
 constexpr ResultCode ERR_INVALID_HANDLE_OS(-1);
-constexpr ResultCode ERR_NOT_FOUND(-1);
-constexpr ResultCode ERR_OUT_OF_RANGE(-1);
-constexpr ResultCode ERR_OUT_OF_RANGE_KERNEL(-1);
+constexpr ResultCode ERR_NOT_FOUND(ErrorModule::Kernel, ErrCodes::NoSuchEntry);
 constexpr ResultCode RESULT_TIMEOUT(ErrorModule::Kernel, ErrCodes::Timeout);
 /// Returned when Accept() is called on a port with no sessions to be accepted.
 constexpr ResultCode ERR_NO_PENDING_SESSIONS(-1);
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index 3a079b9a9..5ee5c05e3 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -65,7 +65,7 @@ ResultCode HandleTable::Close(Handle handle) {
 }
 
 bool HandleTable::IsValid(Handle handle) const {
-    size_t slot = GetSlot(handle);
+    std::size_t slot = GetSlot(handle);
     u16 generation = GetGeneration(handle);
 
     return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h
index cac928adb..9e2f33e8a 100644
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -93,7 +93,7 @@ private:
      * This is the maximum limit of handles allowed per process in CTR-OS. It can be further
      * reduced by ExHeader values, but this is not emulated here.
      */
-    static const size_t MAX_COUNT = 4096;
+    static const std::size_t MAX_COUNT = 4096;
 
     static u16 GetSlot(Handle handle) {
         return handle >> 15;
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 7264be906..72fb9d250 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -42,9 +42,9 @@ SharedPtr<Event> HLERequestContext::SleepClientThread(SharedPtr<Thread> thread,
                                                       Kernel::SharedPtr<Kernel::Event> event) {
 
     // Put the client thread to sleep until the wait event is signaled or the timeout expires.
-    thread->wakeup_callback =
-        [context = *this, callback](ThreadWakeupReason reason, SharedPtr<Thread> thread,
-                                    SharedPtr<WaitObject> object, size_t index) mutable -> bool {
+    thread->wakeup_callback = [context = *this, callback](
+                                  ThreadWakeupReason reason, SharedPtr<Thread> thread,
+                                  SharedPtr<WaitObject> object, std::size_t index) mutable -> bool {
         ASSERT(thread->status == ThreadStatus::WaitHLEEvent);
         callback(thread, context, reason);
         context.WriteToOutgoingCommandBuffer(*thread);
@@ -199,8 +199,8 @@ ResultCode HLERequestContext::PopulateFromIncomingCommandBuffer(u32_le* src_cmdb
     }
 
     // The data_size already includes the payload header, the padding and the domain header.
-    size_t size = data_payload_offset + command_header->data_size -
-                  sizeof(IPC::DataPayloadHeader) / sizeof(u32) - 4;
+    std::size_t size = data_payload_offset + command_header->data_size -
+                       sizeof(IPC::DataPayloadHeader) / sizeof(u32) - 4;
     if (domain_message_header)
         size -= sizeof(IPC::DomainMessageHeader) / sizeof(u32);
     std::copy_n(src_cmdbuf, size, cmd_buf.begin());
@@ -217,8 +217,8 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(const Thread& thread)
     ParseCommandBuffer(cmd_buf.data(), false);
 
     // The data_size already includes the payload header, the padding and the domain header.
-    size_t size = data_payload_offset + command_header->data_size -
-                  sizeof(IPC::DataPayloadHeader) / sizeof(u32) - 4;
+    std::size_t size = data_payload_offset + command_header->data_size -
+                       sizeof(IPC::DataPayloadHeader) / sizeof(u32) - 4;
     if (domain_message_header)
         size -= sizeof(IPC::DomainMessageHeader) / sizeof(u32);
 
@@ -229,7 +229,7 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(const Thread& thread)
                    "Handle descriptor bit set but no handles to translate");
         // We write the translated handles at a specific offset in the command buffer, this space
         // was already reserved when writing the header.
-        size_t current_offset =
+        std::size_t current_offset =
             (sizeof(IPC::CommandHeader) + sizeof(IPC::HandleDescriptorHeader)) / sizeof(u32);
         ASSERT_MSG(!handle_descriptor_header->send_current_pid, "Sending PID is not implemented");
 
@@ -258,7 +258,7 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(const Thread& thread)
         ASSERT(domain_message_header->num_objects == domain_objects.size());
         // Write the domain objects to the command buffer, these go after the raw untranslated data.
         // TODO(Subv): This completely ignores C buffers.
-        size_t domain_offset = size - domain_message_header->num_objects;
+        std::size_t domain_offset = size - domain_message_header->num_objects;
         auto& request_handlers = server_session->domain_request_handlers;
 
         for (auto& object : domain_objects) {
@@ -291,14 +291,15 @@ std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
     return buffer;
 }
 
-size_t HLERequestContext::WriteBuffer(const void* buffer, size_t size, int buffer_index) const {
+std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
+                                           int buffer_index) const {
     if (size == 0) {
         LOG_WARNING(Core, "skip empty buffer write");
         return 0;
     }
 
     const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
-    const size_t buffer_size{GetWriteBufferSize(buffer_index)};
+    const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
     if (size > buffer_size) {
         LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
                      buffer_size);
@@ -314,13 +315,13 @@ size_t HLERequestContext::WriteBuffer(const void* buffer, size_t size, int buffe
     return size;
 }
 
-size_t HLERequestContext::GetReadBufferSize(int buffer_index) const {
+std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const {
     const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()};
     return is_buffer_a ? BufferDescriptorA()[buffer_index].Size()
                        : BufferDescriptorX()[buffer_index].Size();
 }
 
-size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const {
+std::size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const {
     const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
     return is_buffer_b ? BufferDescriptorB()[buffer_index].Size()
                        : BufferDescriptorC()[buffer_index].Size();
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index f0d07f1b6..894479ee0 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -170,7 +170,7 @@ public:
     std::vector<u8> ReadBuffer(int buffer_index = 0) const;
 
     /// Helper function to write a buffer using the appropriate buffer descriptor
-    size_t WriteBuffer(const void* buffer, size_t size, int buffer_index = 0) const;
+    std::size_t WriteBuffer(const void* buffer, std::size_t size, int buffer_index = 0) const;
 
     /* Helper function to write a buffer using the appropriate buffer descriptor
      *
@@ -182,7 +182,7 @@ public:
      */
     template <typename ContiguousContainer,
               typename = std::enable_if_t<!std::is_pointer_v<ContiguousContainer>>>
-    size_t WriteBuffer(const ContiguousContainer& container, int buffer_index = 0) const {
+    std::size_t WriteBuffer(const ContiguousContainer& container, int buffer_index = 0) const {
         using ContiguousType = typename ContiguousContainer::value_type;
 
         static_assert(std::is_trivially_copyable_v<ContiguousType>,
@@ -193,19 +193,19 @@ public:
     }
 
     /// Helper function to get the size of the input buffer
-    size_t GetReadBufferSize(int buffer_index = 0) const;
+    std::size_t GetReadBufferSize(int buffer_index = 0) const;
 
     /// Helper function to get the size of the output buffer
-    size_t GetWriteBufferSize(int buffer_index = 0) const;
+    std::size_t GetWriteBufferSize(int buffer_index = 0) const;
 
     template <typename T>
-    SharedPtr<T> GetCopyObject(size_t index) {
+    SharedPtr<T> GetCopyObject(std::size_t index) {
         ASSERT(index < copy_objects.size());
         return DynamicObjectCast<T>(copy_objects[index]);
     }
 
     template <typename T>
-    SharedPtr<T> GetMoveObject(size_t index) {
+    SharedPtr<T> GetMoveObject(std::size_t index) {
         ASSERT(index < move_objects.size());
         return DynamicObjectCast<T>(move_objects[index]);
     }
@@ -223,7 +223,7 @@ public:
     }
 
     template <typename T>
-    std::shared_ptr<T> GetDomainRequestHandler(size_t index) const {
+    std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
         return std::static_pointer_cast<T>(domain_request_handlers[index]);
     }
 
@@ -240,15 +240,15 @@ public:
         domain_objects.clear();
     }
 
-    size_t NumMoveObjects() const {
+    std::size_t NumMoveObjects() const {
         return move_objects.size();
     }
 
-    size_t NumCopyObjects() const {
+    std::size_t NumCopyObjects() const {
         return copy_objects.size();
     }
 
-    size_t NumDomainObjects() const {
+    std::size_t NumDomainObjects() const {
         return domain_objects.size();
     }
 
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index b025e323f..7a272d031 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -40,8 +40,8 @@ SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) {
     return process;
 }
 
-void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
-    for (size_t i = 0; i < len; ++i) {
+void Process::ParseKernelCaps(const u32* kernel_caps, std::size_t len) {
+    for (std::size_t i = 0; i < len; ++i) {
         u32 descriptor = kernel_caps[i];
         u32 type = descriptor >> 20;
 
@@ -211,7 +211,7 @@ ResultCode Process::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
                "Shared memory exceeds bounds of mapped block");
 
     const std::shared_ptr<std::vector<u8>>& backing_block = vma->second.backing_block;
-    size_t backing_block_offset = vma->second.offset + vma_offset;
+    std::size_t backing_block_offset = vma->second.offset + vma_offset;
 
     CASCADE_RESULT(auto new_vma,
                    vm_manager.MapMemoryBlock(dst_addr, backing_block, backing_block_offset, size,
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 1587d40c1..81538f70c 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -59,7 +59,7 @@ class ResourceLimit;
 
 struct CodeSet final : public Object {
     struct Segment {
-        size_t offset = 0;
+        std::size_t offset = 0;
         VAddr addr = 0;
         u32 size = 0;
     };
@@ -164,7 +164,7 @@ public:
      * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them
      * to this process.
      */
-    void ParseKernelCaps(const u32* kernel_caps, size_t len);
+    void ParseKernelCaps(const u32* kernel_caps, std::size_t len);
 
     /**
      * Applies address space changes and launches the process main thread.
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index 2c729afe3..2c06bb7ce 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -119,7 +119,7 @@ public:
     /// Backing memory for this shared memory block.
     std::shared_ptr<std::vector<u8>> backing_block;
     /// Offset into the backing block for this shared memory.
-    size_t backing_block_offset;
+    std::size_t backing_block_offset;
     /// Size of the memory block. Page-aligned.
     u64 size;
     /// Permission restrictions applied to the process which created the block.
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 1c9373ed8..c5c1697ee 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -35,10 +35,21 @@
 #include "core/hle/service/service.h"
 
 namespace Kernel {
+namespace {
+constexpr bool Is4KBAligned(VAddr address) {
+    return (address & 0xFFF) == 0;
+}
+} // Anonymous namespace
 
 /// Set the process heap to a given Size. It can both extend and shrink the heap.
 static ResultCode SetHeapSize(VAddr* heap_addr, u64 heap_size) {
     LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);
+
+    // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 4GB.
+    if ((heap_size & 0xFFFFFFFE001FFFFF) != 0) {
+        return ERR_INVALID_SIZE;
+    }
+
     auto& process = *Core::CurrentProcess();
     CASCADE_RESULT(*heap_addr,
                    process.HeapAllocate(Memory::HEAP_VADDR, heap_size, VMAPermission::ReadWrite));
@@ -56,6 +67,15 @@ static ResultCode SetMemoryAttribute(VAddr addr, u64 size, u32 state0, u32 state
 static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
     LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
               src_addr, size);
+
+    if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Is4KBAligned(size)) {
+        return ERR_INVALID_SIZE;
+    }
+
     return Core::CurrentProcess()->MirrorMemory(dst_addr, src_addr, size);
 }
 
@@ -63,6 +83,15 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
 static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
     LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
               src_addr, size);
+
+    if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Is4KBAligned(size)) {
+        return ERR_INVALID_SIZE;
+    }
+
     return Core::CurrentProcess()->UnmapMemory(dst_addr, src_addr, size);
 }
 
@@ -146,7 +175,7 @@ static ResultCode GetProcessId(u32* process_id, Handle process_handle) {
 
 /// Default thread wakeup callback for WaitSynchronization
 static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, SharedPtr<Thread> thread,
-                                        SharedPtr<WaitObject> object, size_t index) {
+                                        SharedPtr<WaitObject> object, std::size_t index) {
     ASSERT(thread->status == ThreadStatus::WaitSynchAny);
 
     if (reason == ThreadWakeupReason::Timeout) {
@@ -273,7 +302,11 @@ static void Break(u64 reason, u64 info1, u64 info2) {
 }
 
 /// Used to output a message on a debug hardware unit - does nothing on a retail unit
-static void OutputDebugString(VAddr address, s32 len) {
+static void OutputDebugString(VAddr address, u64 len) {
+    if (len == 0) {
+        return;
+    }
+
     std::string str(len, '\0');
     Memory::ReadBlock(address, str.data(), str.size());
     LOG_DEBUG(Debug_Emulated, "{}", str);
@@ -378,7 +411,7 @@ static ResultCode GetThreadPriority(u32* priority, Handle handle) {
 /// Sets the priority for the specified thread
 static ResultCode SetThreadPriority(Handle handle, u32 priority) {
     if (priority > THREADPRIO_LOWEST) {
-        return ERR_OUT_OF_RANGE;
+        return ERR_INVALID_THREAD_PRIORITY;
     }
 
     auto& kernel = Core::System::GetInstance().Kernel();
@@ -411,35 +444,43 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
               "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}",
               shared_memory_handle, addr, size, permissions);
 
+    if (!Is4KBAligned(addr)) {
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Is4KBAligned(size)) {
+        return ERR_INVALID_SIZE;
+    }
+
+    const auto permissions_type = static_cast<MemoryPermission>(permissions);
+    if (permissions_type != MemoryPermission::Read &&
+        permissions_type != MemoryPermission::ReadWrite) {
+        LOG_ERROR(Kernel_SVC, "Invalid permissions=0x{:08X}", permissions);
+        return ERR_INVALID_MEMORY_PERMISSIONS;
+    }
+
     auto& kernel = Core::System::GetInstance().Kernel();
     auto shared_memory = kernel.HandleTable().Get<SharedMemory>(shared_memory_handle);
     if (!shared_memory) {
         return ERR_INVALID_HANDLE;
     }
 
-    MemoryPermission permissions_type = static_cast<MemoryPermission>(permissions);
-    switch (permissions_type) {
-    case MemoryPermission::Read:
-    case MemoryPermission::Write:
-    case MemoryPermission::ReadWrite:
-    case MemoryPermission::Execute:
-    case MemoryPermission::ReadExecute:
-    case MemoryPermission::WriteExecute:
-    case MemoryPermission::ReadWriteExecute:
-    case MemoryPermission::DontCare:
-        return shared_memory->Map(Core::CurrentProcess().get(), addr, permissions_type,
-                                  MemoryPermission::DontCare);
-    default:
-        LOG_ERROR(Kernel_SVC, "unknown permissions=0x{:08X}", permissions);
-    }
-
-    return RESULT_SUCCESS;
+    return shared_memory->Map(Core::CurrentProcess().get(), addr, permissions_type,
+                              MemoryPermission::DontCare);
 }
 
 static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size) {
     LOG_WARNING(Kernel_SVC, "called, shared_memory_handle=0x{:08X}, addr=0x{:X}, size=0x{:X}",
                 shared_memory_handle, addr, size);
 
+    if (!Is4KBAligned(addr)) {
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Is4KBAligned(size)) {
+        return ERR_INVALID_SIZE;
+    }
+
     auto& kernel = Core::System::GetInstance().Kernel();
     auto shared_memory = kernel.HandleTable().Get<SharedMemory>(shared_memory_handle);
 
@@ -520,10 +561,10 @@ static void ExitProcess() {
 /// Creates a new thread
 static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, VAddr stack_top,
                                u32 priority, s32 processor_id) {
-    std::string name = fmt::format("unknown-{:X}", entry_point);
+    std::string name = fmt::format("thread-{:X}", entry_point);
 
     if (priority > THREADPRIO_LOWEST) {
-        return ERR_OUT_OF_RANGE;
+        return ERR_INVALID_THREAD_PRIORITY;
     }
 
     SharedPtr<ResourceLimit>& resource_limit = Core::CurrentProcess()->resource_limit;
@@ -544,8 +585,8 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
     case THREADPROCESSORID_3:
         break;
     default:
-        ASSERT_MSG(false, "Unsupported thread processor ID: {}", processor_id);
-        break;
+        LOG_ERROR(Kernel_SVC, "Invalid thread processor ID: {}", processor_id);
+        return ERR_INVALID_PROCESSOR_ID;
     }
 
     auto& kernel = Core::System::GetInstance().Kernel();
@@ -643,16 +684,17 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
     LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
               condition_variable_addr, target);
 
-    auto RetrieveWaitingThreads =
-        [](size_t core_index, std::vector<SharedPtr<Thread>>& waiting_threads, VAddr condvar_addr) {
-            const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
-            auto& thread_list = scheduler->GetThreadList();
+    auto RetrieveWaitingThreads = [](std::size_t core_index,
+                                     std::vector<SharedPtr<Thread>>& waiting_threads,
+                                     VAddr condvar_addr) {
+        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+        auto& thread_list = scheduler->GetThreadList();
 
-            for (auto& thread : thread_list) {
-                if (thread->condvar_wait_address == condvar_addr)
-                    waiting_threads.push_back(thread);
-            }
-        };
+        for (auto& thread : thread_list) {
+            if (thread->condvar_wait_address == condvar_addr)
+                waiting_threads.push_back(thread);
+        }
+    };
 
     // Retrieve a list of all threads that are waiting for this condition variable.
     std::vector<SharedPtr<Thread>> waiting_threads;
@@ -668,7 +710,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
     // Only process up to 'target' threads, unless 'target' is -1, in which case process
     // them all.
-    size_t last = waiting_threads.size();
+    std::size_t last = waiting_threads.size();
     if (target != -1)
         last = target;
 
@@ -676,12 +718,12 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
     if (last > waiting_threads.size())
         return RESULT_SUCCESS;
 
-    for (size_t index = 0; index < last; ++index) {
+    for (std::size_t index = 0; index < last; ++index) {
         auto& thread = waiting_threads[index];
 
         ASSERT(thread->condvar_wait_address == condition_variable_addr);
 
-        size_t current_core = Core::System::GetInstance().CurrentCoreIndex();
+        std::size_t current_core = Core::System::GetInstance().CurrentCoreIndex();
 
         auto& monitor = Core::System::GetInstance().Monitor();
 
@@ -894,12 +936,28 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss
     LOG_TRACE(Kernel_SVC, "called, size=0x{:X}, localPerms=0x{:08X}, remotePerms=0x{:08X}", size,
               local_permissions, remote_permissions);
 
+    // Size must be a multiple of 4KB and be less than or equal to
+    // approx. 8 GB (actually (1GB - 512B) * 8)
+    if (size == 0 || (size & 0xFFFFFFFE00000FFF) != 0) {
+        return ERR_INVALID_SIZE;
+    }
+
+    const auto local_perms = static_cast<MemoryPermission>(local_permissions);
+    if (local_perms != MemoryPermission::Read && local_perms != MemoryPermission::ReadWrite) {
+        return ERR_INVALID_MEMORY_PERMISSIONS;
+    }
+
+    const auto remote_perms = static_cast<MemoryPermission>(remote_permissions);
+    if (remote_perms != MemoryPermission::Read && remote_perms != MemoryPermission::ReadWrite &&
+        remote_perms != MemoryPermission::DontCare) {
+        return ERR_INVALID_MEMORY_PERMISSIONS;
+    }
+
     auto& kernel = Core::System::GetInstance().Kernel();
     auto& handle_table = kernel.HandleTable();
     auto shared_mem_handle =
         SharedMemory::Create(kernel, handle_table.Get<Process>(KernelHandle::CurrentProcess), size,
-                             static_cast<MemoryPermission>(local_permissions),
-                             static_cast<MemoryPermission>(remote_permissions));
+                             local_perms, remote_perms);
 
     CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle));
     return RESULT_SUCCESS;
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 79c3fe31b..1eda5f879 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -222,9 +222,9 @@ void SvcWrap() {
     func((s64)PARAM(0));
 }
 
-template <void func(u64, s32 len)>
+template <void func(u64, u64 len)>
 void SvcWrap() {
-    func(PARAM(0), (s32)(PARAM(1) & 0xFFFFFFFF));
+    func(PARAM(0), PARAM(1));
 }
 
 template <void func(u64, u64, u64)>
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 3d10d9af2..89cd5f401 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -227,12 +227,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
     // Check if priority is in ranged. Lowest priority -> highest priority id.
     if (priority > THREADPRIO_LOWEST) {
         LOG_ERROR(Kernel_SVC, "Invalid thread priority: {}", priority);
-        return ERR_OUT_OF_RANGE;
+        return ERR_INVALID_THREAD_PRIORITY;
     }
 
     if (processor_id > THREADPROCESSORID_MAX) {
         LOG_ERROR(Kernel_SVC, "Invalid processor id: {}", processor_id);
-        return ERR_OUT_OF_RANGE_KERNEL;
+        return ERR_INVALID_PROCESSOR_ID;
     }
 
     // TODO(yuriks): Other checks, returning 0xD9001BEA
@@ -275,7 +275,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
         available_slot = 0; // Use the first slot in the new page
 
         // Allocate some memory from the end of the linear heap for this region.
-        const size_t offset = thread->tls_memory->size();
+        const std::size_t offset = thread->tls_memory->size();
         thread->tls_memory->insert(thread->tls_memory->end(), Memory::PAGE_SIZE, 0);
 
         auto& vm_manager = owner_process->vm_manager;
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 20f50458b..df4748942 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -15,6 +15,12 @@
 #include "core/hle/kernel/wait_object.h"
 #include "core/hle/result.h"
 
+namespace Kernel {
+
+class KernelCore;
+class Process;
+class Scheduler;
+
 enum ThreadPriority : u32 {
     THREADPRIO_HIGHEST = 0,       ///< Highest thread priority
     THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps
@@ -54,12 +60,6 @@ enum class ThreadWakeupReason {
     Timeout // The thread was woken up due to a wait timeout.
 };
 
-namespace Kernel {
-
-class KernelCore;
-class Process;
-class Scheduler;
-
 class Thread final : public WaitObject {
 public:
     /**
@@ -254,7 +254,7 @@ public:
     Handle callback_handle;
 
     using WakeupCallback = bool(ThreadWakeupReason reason, SharedPtr<Thread> thread,
-                                SharedPtr<WaitObject> object, size_t index);
+                                SharedPtr<WaitObject> object, std::size_t index);
     // Callback that will be invoked when the thread is resumed from a waiting state. If the thread
     // was waiting via WaitSynchronizationN then the object will be the last object that became
     // available. In case of a timeout, the object will be nullptr.
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 479cacb62..608cbd57b 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -86,7 +86,7 @@ VMManager::VMAHandle VMManager::FindVMA(VAddr target) const {
 
 ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
                                                           std::shared_ptr<std::vector<u8>> block,
-                                                          size_t offset, u64 size,
+                                                          std::size_t offset, u64 size,
                                                           MemoryState state) {
     ASSERT(block != nullptr);
     ASSERT(offset + size <= block->size());
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 98bd04bea..de75036c0 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -81,7 +81,7 @@ struct VirtualMemoryArea {
     /// Memory block backing this VMA.
     std::shared_ptr<std::vector<u8>> backing_block = nullptr;
     /// Offset into the backing_memory the mapping starts from.
-    size_t offset = 0;
+    std::size_t offset = 0;
 
     // Settings for type = BackingMemory
     /// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed.
@@ -147,7 +147,7 @@ public:
      * @param state MemoryState tag to attach to the VMA.
      */
     ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
-                                        size_t offset, u64 size, MemoryState state);
+                                        std::size_t offset, u64 size, MemoryState state);
 
     /**
      * Maps an unmanaged host memory pointer at a given address.
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp
index eef00b729..b190ceb98 100644
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -81,7 +81,7 @@ void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) {
         }
     }
 
-    size_t index = thread->GetWaitObjectIndex(this);
+    std::size_t index = thread->GetWaitObjectIndex(this);
 
     for (auto& object : thread->wait_objects)
         object->RemoveWaitingThread(thread.get());
diff --git a/src/core/hle/service/acc/acc_su.h b/src/core/hle/service/acc/acc_su.h
index a3eb885bf..fcced063a 100644
--- a/src/core/hle/service/acc/acc_su.h
+++ b/src/core/hle/service/acc/acc_su.h
@@ -6,8 +6,7 @@
 
 #include "core/hle/service/acc/acc.h"
 
-namespace Service {
-namespace Account {
+namespace Service::Account {
 
 class ACC_SU final : public Module::Interface {
 public:
@@ -16,5 +15,4 @@ public:
     ~ACC_SU() override;
 };
 
-} // namespace Account
-} // namespace Service
+} // namespace Service::Account
diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp
index 4ccebef23..0071ca613 100644
--- a/src/core/hle/service/acc/profile_manager.cpp
+++ b/src/core/hle/service/acc/profile_manager.cpp
@@ -33,7 +33,7 @@ ProfileManager::~ProfileManager() = default;
 
 /// After a users creation it needs to be "registered" to the system. AddToProfiles handles the
 /// internal management of the users profiles
-boost::optional<size_t> ProfileManager::AddToProfiles(const ProfileInfo& user) {
+boost::optional<std::size_t> ProfileManager::AddToProfiles(const ProfileInfo& user) {
     if (user_count >= MAX_USERS) {
         return boost::none;
     }
@@ -42,7 +42,7 @@ boost::optional<size_t> ProfileManager::AddToProfiles(const ProfileInfo& user) {
 }
 
 /// Deletes a specific profile based on it's profile index
-bool ProfileManager::RemoveProfileAtIndex(size_t index) {
+bool ProfileManager::RemoveProfileAtIndex(std::size_t index) {
     if (index >= MAX_USERS || index >= user_count) {
         return false;
     }
@@ -101,7 +101,7 @@ ResultCode ProfileManager::CreateNewUser(UUID uuid, const std::string& username)
 }
 
 /// Returns a users profile index based on their user id.
-boost::optional<size_t> ProfileManager::GetUserIndex(const UUID& uuid) const {
+boost::optional<std::size_t> ProfileManager::GetUserIndex(const UUID& uuid) const {
     if (!uuid) {
         return boost::none;
     }
@@ -110,16 +110,17 @@ boost::optional<size_t> ProfileManager::GetUserIndex(const UUID& uuid) const {
     if (iter == profiles.end()) {
         return boost::none;
     }
-    return static_cast<size_t>(std::distance(profiles.begin(), iter));
+    return static_cast<std::size_t>(std::distance(profiles.begin(), iter));
 }
 
 /// Returns a users profile index based on their profile
-boost::optional<size_t> ProfileManager::GetUserIndex(const ProfileInfo& user) const {
+boost::optional<std::size_t> ProfileManager::GetUserIndex(const ProfileInfo& user) const {
     return GetUserIndex(user.user_uuid);
 }
 
 /// Returns the data structure used by the switch when GetProfileBase is called on acc:*
-bool ProfileManager::GetProfileBase(boost::optional<size_t> index, ProfileBase& profile) const {
+bool ProfileManager::GetProfileBase(boost::optional<std::size_t> index,
+                                    ProfileBase& profile) const {
     if (index == boost::none || index >= MAX_USERS) {
         return false;
     }
@@ -143,14 +144,16 @@ bool ProfileManager::GetProfileBase(const ProfileInfo& user, ProfileBase& profil
 
 /// Returns the current user count on the system. We keep a variable which tracks the count so we
 /// don't have to loop the internal profile array every call.
-size_t ProfileManager::GetUserCount() const {
+
+std::size_t ProfileManager::GetUserCount() const {
     return user_count;
 }
 
 /// Lists the current "opened" users on the system. Users are typically not open until they sign
 /// into something or pick a profile. As of right now users should all be open until qlaunch is
 /// booting
-size_t ProfileManager::GetOpenUserCount() const {
+
+std::size_t ProfileManager::GetOpenUserCount() const {
     return std::count_if(profiles.begin(), profiles.end(),
                          [](const ProfileInfo& p) { return p.is_open; });
 }
@@ -206,7 +209,7 @@ UUID ProfileManager::GetLastOpenedUser() const {
 }
 
 /// Return the users profile base and the unknown arbitary data.
-bool ProfileManager::GetProfileBaseAndData(boost::optional<size_t> index, ProfileBase& profile,
+bool ProfileManager::GetProfileBaseAndData(boost::optional<std::size_t> index, ProfileBase& profile,
                                            ProfileData& data) const {
     if (GetProfileBase(index, profile)) {
         data = profiles[index.get()].data;
diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h
index cd8df93a5..bffd4cf4d 100644
--- a/src/core/hle/service/acc/profile_manager.h
+++ b/src/core/hle/service/acc/profile_manager.h
@@ -12,8 +12,8 @@
 #include "core/hle/result.h"
 
 namespace Service::Account {
-constexpr size_t MAX_USERS = 8;
-constexpr size_t MAX_DATA = 128;
+constexpr std::size_t MAX_USERS = 8;
+constexpr std::size_t MAX_DATA = 128;
 constexpr u128 INVALID_UUID{{0, 0}};
 
 struct UUID {
@@ -87,18 +87,18 @@ public:
     ResultCode AddUser(const ProfileInfo& user);
     ResultCode CreateNewUser(UUID uuid, const ProfileUsername& username);
     ResultCode CreateNewUser(UUID uuid, const std::string& username);
-    boost::optional<size_t> GetUserIndex(const UUID& uuid) const;
-    boost::optional<size_t> GetUserIndex(const ProfileInfo& user) const;
-    bool GetProfileBase(boost::optional<size_t> index, ProfileBase& profile) const;
+    boost::optional<std::size_t> GetUserIndex(const UUID& uuid) const;
+    boost::optional<std::size_t> GetUserIndex(const ProfileInfo& user) const;
+    bool GetProfileBase(boost::optional<std::size_t> index, ProfileBase& profile) const;
     bool GetProfileBase(UUID uuid, ProfileBase& profile) const;
     bool GetProfileBase(const ProfileInfo& user, ProfileBase& profile) const;
-    bool GetProfileBaseAndData(boost::optional<size_t> index, ProfileBase& profile,
+    bool GetProfileBaseAndData(boost::optional<std::size_t> index, ProfileBase& profile,
                                ProfileData& data) const;
     bool GetProfileBaseAndData(UUID uuid, ProfileBase& profile, ProfileData& data) const;
     bool GetProfileBaseAndData(const ProfileInfo& user, ProfileBase& profile,
                                ProfileData& data) const;
-    size_t GetUserCount() const;
-    size_t GetOpenUserCount() const;
+    std::size_t GetUserCount() const;
+    std::size_t GetOpenUserCount() const;
     bool UserExists(UUID uuid) const;
     void OpenUser(UUID uuid);
     void CloseUser(UUID uuid);
@@ -110,9 +110,9 @@ public:
 
 private:
     std::array<ProfileInfo, MAX_USERS> profiles{};
-    size_t user_count = 0;
-    boost::optional<size_t> AddToProfiles(const ProfileInfo& profile);
-    bool RemoveProfileAtIndex(size_t index);
+    std::size_t user_count = 0;
+    boost::optional<std::size_t> AddToProfiles(const ProfileInfo& profile);
+    bool RemoveProfileAtIndex(std::size_t index);
     UUID last_opened_user{INVALID_UUID};
 };
 
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index a57ed3042..d1f7007ec 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -456,7 +456,7 @@ private:
         IPC::RequestParser rp{ctx};
 
         const u64 offset{rp.Pop<u64>()};
-        const size_t size{ctx.GetWriteBufferSize()};
+        const std::size_t size{ctx.GetWriteBufferSize()};
 
         ASSERT(offset + size <= buffer.size());
 
diff --git a/src/core/hle/service/audio/audio.cpp b/src/core/hle/service/audio/audio.cpp
index 6b5e15633..128df7db5 100644
--- a/src/core/hle/service/audio/audio.cpp
+++ b/src/core/hle/service/audio/audio.cpp
@@ -15,6 +15,7 @@
 #include "core/hle/service/audio/audren_u.h"
 #include "core/hle/service/audio/codecctl.h"
 #include "core/hle/service/audio/hwopus.h"
+#include "core/hle/service/service.h"
 
 namespace Service::Audio {
 
diff --git a/src/core/hle/service/audio/audio.h b/src/core/hle/service/audio/audio.h
index 95e5691f7..f5bd3bf5f 100644
--- a/src/core/hle/service/audio/audio.h
+++ b/src/core/hle/service/audio/audio.h
@@ -4,7 +4,9 @@
 
 #pragma once
 
-#include "core/hle/service/service.h"
+namespace Service::SM {
+class ServiceManager;
+}
 
 namespace Service::Audio {
 
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 05100ca8f..80a002322 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -3,15 +3,20 @@
 // Refer to the license.txt file included.
 
 #include <array>
+#include <cstring>
 #include <vector>
 
+#include "audio_core/audio_out.h"
 #include "audio_core/codec.h"
+#include "common/common_funcs.h"
 #include "common/logging/log.h"
+#include "common/swap.h"
 #include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/service/audio/audout_u.h"
+#include "core/memory.h"
 
 namespace Service::Audio {
 
@@ -25,6 +30,18 @@ enum {
 constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
 constexpr int DefaultSampleRate{48000};
 
+struct AudoutParams {
+    s32_le sample_rate;
+    u16_le channel_count;
+    INSERT_PADDING_BYTES(2);
+};
+static_assert(sizeof(AudoutParams) == 0x8, "AudoutParams is an invalid size");
+
+enum class AudioState : u32 {
+    Started,
+    Stopped,
+};
+
 class IAudioOut final : public ServiceFramework<IAudioOut> {
 public:
     IAudioOut(AudoutParams audio_params, AudioCore::AudioOut& audio_core)
diff --git a/src/core/hle/service/audio/audout_u.h b/src/core/hle/service/audio/audout_u.h
index aa52d3855..dcaf64708 100644
--- a/src/core/hle/service/audio/audout_u.h
+++ b/src/core/hle/service/audio/audout_u.h
@@ -4,27 +4,18 @@
 
 #pragma once
 
-#include "audio_core/audio_out.h"
 #include "core/hle/service/service.h"
 
+namespace AudioCore {
+class AudioOut;
+}
+
 namespace Kernel {
 class HLERequestContext;
 }
 
 namespace Service::Audio {
 
-struct AudoutParams {
-    s32_le sample_rate;
-    u16_le channel_count;
-    INSERT_PADDING_BYTES(2);
-};
-static_assert(sizeof(AudoutParams) == 0x8, "AudoutParams is an invalid size");
-
-enum class AudioState : u32 {
-    Started,
-    Stopped,
-};
-
 class IAudioOut;
 
 class AudOutU final : public ServiceFramework<AudOutU> {
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 3870bec65..e84c4fa2b 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -2,12 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <array>
+#include <memory>
 
+#include "audio_core/audio_renderer.h"
 #include "common/alignment.h"
+#include "common/common_funcs.h"
 #include "common/logging/log.h"
-#include "core/core_timing.h"
-#include "core/core_timing_util.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/kernel/hle_ipc.h"
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 85a995a2f..c6bc3a90a 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include "audio_core/audio_renderer.h"
 #include "core/hle/service/service.h"
 
 namespace Kernel {
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 341bfda42..fc6067e59 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -3,7 +3,12 @@
 // Refer to the license.txt file included.
 
 #include <cstring>
+#include <memory>
+#include <vector>
+
 #include <opus.h>
+
+#include "common/common_funcs.h"
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/hle_ipc.h"
@@ -56,7 +61,7 @@ private:
 
     bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input,
                                    std::vector<opus_int16>& output) {
-        size_t raw_output_sz = output.size() * sizeof(opus_int16);
+        std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
         if (sizeof(OpusHeader) > input.size())
             return false;
         OpusHeader hdr{};
@@ -91,7 +96,7 @@ private:
     u32 channel_count;
 };
 
-static size_t WorkerBufferSize(u32 channel_count) {
+static std::size_t WorkerBufferSize(u32 channel_count) {
     ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
     return opus_decoder_get_size(static_cast<int>(channel_count));
 }
@@ -124,7 +129,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
                "Invalid sample rate");
     ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
 
-    size_t worker_sz = WorkerBufferSize(channel_count);
+    std::size_t worker_sz = WorkerBufferSize(channel_count);
     ASSERT_MSG(buffer_sz < worker_sz, "Worker buffer too large");
     std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
         static_cast<OpusDecoder*>(operator new(worker_sz))};
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index a8e0c869f..ab2f17db9 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -89,7 +89,7 @@ private:
         controller_header.left_color_body = JOYCON_BODY_NEON_BLUE;
         controller_header.left_color_buttons = JOYCON_BUTTONS_NEON_BLUE;
 
-        for (size_t controller = 0; controller < mem.controllers.size(); controller++) {
+        for (std::size_t controller = 0; controller < mem.controllers.size(); controller++) {
             for (auto& layout : mem.controllers[controller].layouts) {
                 layout.header.num_entries = HID_NUM_ENTRIES;
                 layout.header.max_entry_index = HID_NUM_ENTRIES - 1;
diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp
index 098da2a41..c89157a4d 100644
--- a/src/core/hle/service/lm/lm.cpp
+++ b/src/core/hle/service/lm/lm.cpp
@@ -99,7 +99,7 @@ private:
         std::string thread;
         while (addr < end_addr) {
             const Field field{static_cast<Field>(Memory::Read8(addr++))};
-            const size_t length{Memory::Read8(addr++)};
+            const std::size_t length{Memory::Read8(addr++)};
 
             if (static_cast<Field>(Memory::Read8(addr)) == Field::Skip) {
                 ++addr;
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp
index da1c46d59..1069d103f 100644
--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -2,6 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
+#include <cstring>
+#include <vector>
+
 #include <FontChineseSimplified.h>
 #include <FontChineseTraditional.h>
 #include <FontExtendedChineseSimplified.h>
@@ -9,14 +13,19 @@
 #include <FontNintendoExtended.h>
 #include <FontStandard.h>
 
+#include "common/assert.h"
 #include "common/common_paths.h"
+#include "common/common_types.h"
 #include "common/file_util.h"
+#include "common/logging/log.h"
+#include "common/swap.h"
 #include "core/core.h"
 #include "core/file_sys/content_archive.h"
 #include "core/file_sys/nca_metadata.h"
 #include "core/file_sys/registered_cache.h"
 #include "core/file_sys/romfs.h"
 #include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/shared_memory.h"
 #include "core/hle/service/filesystem/filesystem.h"
 #include "core/hle/service/ns/pl_u.h"
 
@@ -35,49 +44,41 @@ struct FontRegion {
     u32 size;
 };
 
-static constexpr std::array<std::pair<FontArchives, const char*>, 7> SHARED_FONTS{
+constexpr std::array<std::pair<FontArchives, const char*>, 7> SHARED_FONTS{
     std::make_pair(FontArchives::Standard, "nintendo_udsg-r_std_003.bfttf"),
     std::make_pair(FontArchives::ChineseSimple, "nintendo_udsg-r_org_zh-cn_003.bfttf"),
     std::make_pair(FontArchives::ChineseSimple, "nintendo_udsg-r_ext_zh-cn_003.bfttf"),
     std::make_pair(FontArchives::ChineseTraditional, "nintendo_udjxh-db_zh-tw_003.bfttf"),
     std::make_pair(FontArchives::Korean, "nintendo_udsg-r_ko_003.bfttf"),
     std::make_pair(FontArchives::Extension, "nintendo_ext_003.bfttf"),
-    std::make_pair(FontArchives::Extension, "nintendo_ext2_003.bfttf")};
+    std::make_pair(FontArchives::Extension, "nintendo_ext2_003.bfttf"),
+};
 
-static constexpr std::array<const char*, 7> SHARED_FONTS_TTF{"FontStandard.ttf",
-                                                             "FontChineseSimplified.ttf",
-                                                             "FontExtendedChineseSimplified.ttf",
-                                                             "FontChineseTraditional.ttf",
-                                                             "FontKorean.ttf",
-                                                             "FontNintendoExtended.ttf",
-                                                             "FontNintendoExtended2.ttf"};
+constexpr std::array<const char*, 7> SHARED_FONTS_TTF{
+    "FontStandard.ttf",
+    "FontChineseSimplified.ttf",
+    "FontExtendedChineseSimplified.ttf",
+    "FontChineseTraditional.ttf",
+    "FontKorean.ttf",
+    "FontNintendoExtended.ttf",
+    "FontNintendoExtended2.ttf",
+};
 
 // The below data is specific to shared font data dumped from Switch on f/w 2.2
 // Virtual address and offsets/sizes likely will vary by dump
-static constexpr VAddr SHARED_FONT_MEM_VADDR{0x00000009d3016000ULL};
-static constexpr u32 EXPECTED_RESULT{
-    0x7f9a0218}; // What we expect the decrypted bfttf first 4 bytes to be
-static constexpr u32 EXPECTED_MAGIC{
-    0x36f81a1e}; // What we expect the encrypted bfttf first 4 bytes to be
-static constexpr u64 SHARED_FONT_MEM_SIZE{0x1100000};
-static constexpr FontRegion EMPTY_REGION{0, 0};
-std::vector<FontRegion>
-    SHARED_FONT_REGIONS{}; // Automatically populated based on shared_fonts dump or system archives
-
-const FontRegion& GetSharedFontRegion(size_t index) {
-    if (index >= SHARED_FONT_REGIONS.size() || SHARED_FONT_REGIONS.empty()) {
-        // No font fallback
-        return EMPTY_REGION;
-    }
-    return SHARED_FONT_REGIONS.at(index);
-}
+constexpr VAddr SHARED_FONT_MEM_VADDR{0x00000009d3016000ULL};
+constexpr u32 EXPECTED_RESULT{0x7f9a0218}; // What we expect the decrypted bfttf first 4 bytes to be
+constexpr u32 EXPECTED_MAGIC{0x36f81a1e};  // What we expect the encrypted bfttf first 4 bytes to be
+constexpr u64 SHARED_FONT_MEM_SIZE{0x1100000};
+constexpr FontRegion EMPTY_REGION{0, 0};
 
 enum class LoadState : u32 {
     Loading = 0,
     Done = 1,
 };
 
-void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, size_t& offset) {
+static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output,
+                              std::size_t& offset) {
     ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE,
                "Shared fonts exceeds 17mb!");
     ASSERT_MSG(input[0] == EXPECTED_MAGIC, "Failed to derive key, unexpected magic number");
@@ -94,7 +95,7 @@ void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, s
 }
 
 static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& output,
-                              size_t& offset) {
+                              std::size_t& offset) {
     ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!");
     const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT;
     std::memcpy(output.data() + offset, &EXPECTED_RESULT, sizeof(u32)); // Magic header
@@ -104,28 +105,52 @@ static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& out
     offset += input.size() + (sizeof(u32) * 2);
 }
 
+// Helper function to make BuildSharedFontsRawRegions a bit nicer
 static u32 GetU32Swapped(const u8* data) {
     u32 value;
     std::memcpy(&value, data, sizeof(value));
-    return Common::swap32(value); // Helper function to make BuildSharedFontsRawRegions a bit nicer
+    return Common::swap32(value);
 }
 
-void BuildSharedFontsRawRegions(const std::vector<u8>& input) {
-    unsigned cur_offset = 0; // As we can derive the xor key we can just populate the offsets based
-                             // on the shared memory dump
-    for (size_t i = 0; i < SHARED_FONTS.size(); i++) {
-        // Out of shared fonts/Invalid font
-        if (GetU32Swapped(input.data() + cur_offset) != EXPECTED_RESULT)
-            break;
-        const u32 KEY = GetU32Swapped(input.data() + cur_offset) ^
-                        EXPECTED_MAGIC; // Derive key withing inverse xor
-        const u32 SIZE = GetU32Swapped(input.data() + cur_offset + 4) ^ KEY;
-        SHARED_FONT_REGIONS.push_back(FontRegion{cur_offset + 8, SIZE});
-        cur_offset += SIZE + 8;
+struct PL_U::Impl {
+    const FontRegion& GetSharedFontRegion(std::size_t index) const {
+        if (index >= shared_font_regions.size() || shared_font_regions.empty()) {
+            // No font fallback
+            return EMPTY_REGION;
+        }
+        return shared_font_regions.at(index);
     }
-}
 
-PL_U::PL_U() : ServiceFramework("pl:u") {
+    void BuildSharedFontsRawRegions(const std::vector<u8>& input) {
+        // As we can derive the xor key we can just populate the offsets
+        // based on the shared memory dump
+        unsigned cur_offset = 0;
+
+        for (std::size_t i = 0; i < SHARED_FONTS.size(); i++) {
+            // Out of shared fonts/invalid font
+            if (GetU32Swapped(input.data() + cur_offset) != EXPECTED_RESULT) {
+                break;
+            }
+
+            // Derive key withing inverse xor
+            const u32 KEY = GetU32Swapped(input.data() + cur_offset) ^ EXPECTED_MAGIC;
+            const u32 SIZE = GetU32Swapped(input.data() + cur_offset + 4) ^ KEY;
+            shared_font_regions.push_back(FontRegion{cur_offset + 8, SIZE});
+            cur_offset += SIZE + 8;
+        }
+    }
+
+    /// Handle to shared memory region designated for a shared font
+    Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem;
+
+    /// Backing memory for the shared font data
+    std::shared_ptr<std::vector<u8>> shared_font;
+
+    // Automatically populated based on shared_fonts dump or system archives.
+    std::vector<FontRegion> shared_font_regions;
+};
+
+PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
     static const FunctionInfo functions[] = {
         {0, &PL_U::RequestLoad, "RequestLoad"},
         {1, &PL_U::GetLoadState, "GetLoadState"},
@@ -137,11 +162,11 @@ PL_U::PL_U() : ServiceFramework("pl:u") {
     RegisterHandlers(functions);
     // Attempt to load shared font data from disk
     const auto nand = FileSystem::GetSystemNANDContents();
-    size_t offset = 0;
+    std::size_t offset = 0;
     // Rebuild shared fonts from data ncas
     if (nand->HasEntry(static_cast<u64>(FontArchives::Standard),
                        FileSys::ContentRecordType::Data)) {
-        shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE);
+        impl->shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE);
         for (auto font : SHARED_FONTS) {
             const auto nca =
                 nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data);
@@ -177,12 +202,12 @@ PL_U::PL_U() : ServiceFramework("pl:u") {
                 static_cast<u32>(offset + 8),
                 static_cast<u32>((font_data_u32.size() * sizeof(u32)) -
                                  8)}; // Font offset and size do not account for the header
-            DecryptSharedFont(font_data_u32, *shared_font, offset);
-            SHARED_FONT_REGIONS.push_back(region);
+            DecryptSharedFont(font_data_u32, *impl->shared_font, offset);
+            impl->shared_font_regions.push_back(region);
         }
 
     } else {
-        shared_font = std::make_shared<std::vector<u8>>(
+        impl->shared_font = std::make_shared<std::vector<u8>>(
             SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size
 
         const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir);
@@ -206,8 +231,8 @@ PL_U::PL_U() : ServiceFramework("pl:u") {
                         static_cast<u32>(offset + 8),
                         static_cast<u32>(ttf_bytes.size())}; // Font offset and size do not account
                                                              // for the header
-                    EncryptSharedFont(ttf_bytes, *shared_font, offset);
-                    SHARED_FONT_REGIONS.push_back(region);
+                    EncryptSharedFont(ttf_bytes, *impl->shared_font, offset);
+                    impl->shared_font_regions.push_back(region);
                 } else {
                     LOG_WARNING(Service_NS, "Unable to load font: {}", font_ttf);
                 }
@@ -222,26 +247,28 @@ PL_U::PL_U() : ServiceFramework("pl:u") {
         if (file.IsOpen()) {
             // Read shared font data
             ASSERT(file.GetSize() == SHARED_FONT_MEM_SIZE);
-            file.ReadBytes(shared_font->data(), shared_font->size());
-            BuildSharedFontsRawRegions(*shared_font);
+            file.ReadBytes(impl->shared_font->data(), impl->shared_font->size());
+            impl->BuildSharedFontsRawRegions(*impl->shared_font);
         } else {
             LOG_WARNING(Service_NS,
                         "Shared Font file missing. Loading open source replacement from memory");
 
+            // clang-format off
             const std::vector<std::vector<u8>> open_source_shared_fonts_ttf = {
                 {std::begin(FontChineseSimplified), std::end(FontChineseSimplified)},
                 {std::begin(FontChineseTraditional), std::end(FontChineseTraditional)},
-                {std::begin(FontExtendedChineseSimplified),
-                 std::end(FontExtendedChineseSimplified)},
+                {std::begin(FontExtendedChineseSimplified), std::end(FontExtendedChineseSimplified)},
+                {std::begin(FontKorean), std::end(FontKorean)},
                 {std::begin(FontNintendoExtended), std::end(FontNintendoExtended)},
                 {std::begin(FontStandard), std::end(FontStandard)},
             };
+            // clang-format on
 
             for (const std::vector<u8>& font_ttf : open_source_shared_fonts_ttf) {
                 const FontRegion region{static_cast<u32>(offset + 8),
                                         static_cast<u32>(font_ttf.size())};
-                EncryptSharedFont(font_ttf, *shared_font, offset);
-                SHARED_FONT_REGIONS.push_back(region);
+                EncryptSharedFont(font_ttf, *impl->shared_font, offset);
+                impl->shared_font_regions.push_back(region);
             }
         }
     }
@@ -275,7 +302,7 @@ void PL_U::GetSize(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_NS, "called, font_id={}", font_id);
     IPC::ResponseBuilder rb{ctx, 3};
     rb.Push(RESULT_SUCCESS);
-    rb.Push<u32>(GetSharedFontRegion(font_id).size);
+    rb.Push<u32>(impl->GetSharedFontRegion(font_id).size);
 }
 
 void PL_U::GetSharedMemoryAddressOffset(Kernel::HLERequestContext& ctx) {
@@ -285,17 +312,18 @@ void PL_U::GetSharedMemoryAddressOffset(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_NS, "called, font_id={}", font_id);
     IPC::ResponseBuilder rb{ctx, 3};
     rb.Push(RESULT_SUCCESS);
-    rb.Push<u32>(GetSharedFontRegion(font_id).offset);
+    rb.Push<u32>(impl->GetSharedFontRegion(font_id).offset);
 }
 
 void PL_U::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx) {
     // Map backing memory for the font data
-    Core::CurrentProcess()->vm_manager.MapMemoryBlock(
-        SHARED_FONT_MEM_VADDR, shared_font, 0, SHARED_FONT_MEM_SIZE, Kernel::MemoryState::Shared);
+    Core::CurrentProcess()->vm_manager.MapMemoryBlock(SHARED_FONT_MEM_VADDR, impl->shared_font, 0,
+                                                      SHARED_FONT_MEM_SIZE,
+                                                      Kernel::MemoryState::Shared);
 
     // Create shared font memory object
     auto& kernel = Core::System::GetInstance().Kernel();
-    shared_font_mem = Kernel::SharedMemory::Create(
+    impl->shared_font_mem = Kernel::SharedMemory::Create(
         kernel, Core::CurrentProcess(), SHARED_FONT_MEM_SIZE, Kernel::MemoryPermission::ReadWrite,
         Kernel::MemoryPermission::Read, SHARED_FONT_MEM_VADDR, Kernel::MemoryRegion::BASE,
         "PL_U:shared_font_mem");
@@ -303,7 +331,7 @@ void PL_U::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx) {
     LOG_DEBUG(Service_NS, "called");
     IPC::ResponseBuilder rb{ctx, 2, 1};
     rb.Push(RESULT_SUCCESS);
-    rb.PushCopyObjects(shared_font_mem);
+    rb.PushCopyObjects(impl->shared_font_mem);
 }
 
 void PL_U::GetSharedFontInOrderOfPriority(Kernel::HLERequestContext& ctx) {
@@ -316,9 +344,9 @@ void PL_U::GetSharedFontInOrderOfPriority(Kernel::HLERequestContext& ctx) {
     std::vector<u32> font_sizes;
 
     // TODO(ogniK): Have actual priority order
-    for (size_t i = 0; i < SHARED_FONT_REGIONS.size(); i++) {
+    for (std::size_t i = 0; i < impl->shared_font_regions.size(); i++) {
         font_codes.push_back(static_cast<u32>(i));
-        auto region = GetSharedFontRegion(i);
+        auto region = impl->GetSharedFontRegion(i);
         font_offsets.push_back(region.offset);
         font_sizes.push_back(region.size);
     }
diff --git a/src/core/hle/service/ns/pl_u.h b/src/core/hle/service/ns/pl_u.h
index 296c3db05..253f26a2a 100644
--- a/src/core/hle/service/ns/pl_u.h
+++ b/src/core/hle/service/ns/pl_u.h
@@ -5,7 +5,6 @@
 #pragma once
 
 #include <memory>
-#include "core/hle/kernel/shared_memory.h"
 #include "core/hle/service/service.h"
 
 namespace Service::NS {
@@ -23,11 +22,8 @@ private:
     void GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx);
     void GetSharedFontInOrderOfPriority(Kernel::HLERequestContext& ctx);
 
-    /// Handle to shared memory region designated for a shared font
-    Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem;
-
-    /// Backing memory for the shared font data
-    std::shared_ptr<std::vector<u8>> shared_font;
+    struct Impl;
+    std::unique_ptr<Impl> impl;
 };
 
 } // namespace Service::NS
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 25d5a93fa..d8b8037a8 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -71,7 +71,7 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>&
 }
 
 u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output) {
-    size_t num_entries = input.size() / sizeof(IoctlRemapEntry);
+    std::size_t num_entries = input.size() / sizeof(IoctlRemapEntry);
 
     LOG_WARNING(Service_NVDRV, "(STUBBED) called, num_entries=0x{:X}", num_entries);
 
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 34f98fe5a..fd98d541d 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -9,8 +9,7 @@
 #include "core/core.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 
-namespace Service {
-namespace NVFlinger {
+namespace Service::NVFlinger {
 
 BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) {
     auto& kernel = Core::System::GetInstance().Kernel();
@@ -104,5 +103,4 @@ u32 BufferQueue::Query(QueryType type) {
     return 0;
 }
 
-} // namespace NVFlinger
-} // namespace Service
+} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index 17c81928a..50b767732 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -15,8 +15,7 @@ namespace CoreTiming {
 struct EventType;
 }
 
-namespace Service {
-namespace NVFlinger {
+namespace Service::NVFlinger {
 
 struct IGBPBuffer {
     u32_le magic;
@@ -98,5 +97,4 @@ private:
     Kernel::SharedPtr<Kernel::Event> buffer_wait_event;
 };
 
-} // namespace NVFlinger
-} // namespace Service
+} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 7455ddd19..d47b6f659 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -23,7 +23,7 @@
 
 namespace Service::NVFlinger {
 
-constexpr size_t SCREEN_REFRESH_RATE = 60;
+constexpr std::size_t SCREEN_REFRESH_RATE = 60;
 constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
 
 NVFlinger::NVFlinger() {
diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp
index 3c43b8d8c..6a9eccfb5 100644
--- a/src/core/hle/service/prepo/prepo.cpp
+++ b/src/core/hle/service/prepo/prepo.cpp
@@ -1,36 +1,47 @@
-#include <cinttypes>
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/event.h"
 #include "core/hle/service/prepo/prepo.h"
+#include "core/hle/service/service.h"
 
 namespace Service::PlayReport {
-PlayReport::PlayReport(const char* name) : ServiceFramework(name) {
-    static const FunctionInfo functions[] = {
-        {10100, nullptr, "SaveReport"},
-        {10101, &PlayReport::SaveReportWithUser, "SaveReportWithUser"},
-        {10200, nullptr, "RequestImmediateTransmission"},
-        {10300, nullptr, "GetTransmissionStatus"},
-        {20100, nullptr, "SaveSystemReport"},
-        {20200, nullptr, "SetOperationMode"},
-        {20101, nullptr, "SaveSystemReportWithUser"},
-        {30100, nullptr, "ClearStorage"},
-        {40100, nullptr, "IsUserAgreementCheckEnabled"},
-        {40101, nullptr, "SetUserAgreementCheckEnabled"},
-        {90100, nullptr, "GetStorageUsage"},
-        {90200, nullptr, "GetStatistics"},
-        {90201, nullptr, "GetThroughputHistory"},
-        {90300, nullptr, "GetLastUploadError"},
-    };
-    RegisterHandlers(functions);
-};
 
-void PlayReport::SaveReportWithUser(Kernel::HLERequestContext& ctx) {
-    // TODO(ogniK): Do we want to add play report?
-    LOG_WARNING(Service_PREPO, "(STUBBED) called");
+class PlayReport final : public ServiceFramework<PlayReport> {
+public:
+    explicit PlayReport(const char* name) : ServiceFramework{name} {
+        // clang-format off
+        static const FunctionInfo functions[] = {
+            {10100, nullptr, "SaveReport"},
+            {10101, &PlayReport::SaveReportWithUser, "SaveReportWithUser"},
+            {10200, nullptr, "RequestImmediateTransmission"},
+            {10300, nullptr, "GetTransmissionStatus"},
+            {20100, nullptr, "SaveSystemReport"},
+            {20200, nullptr, "SetOperationMode"},
+            {20101, nullptr, "SaveSystemReportWithUser"},
+            {30100, nullptr, "ClearStorage"},
+            {40100, nullptr, "IsUserAgreementCheckEnabled"},
+            {40101, nullptr, "SetUserAgreementCheckEnabled"},
+            {90100, nullptr, "GetStorageUsage"},
+            {90200, nullptr, "GetStatistics"},
+            {90201, nullptr, "GetThroughputHistory"},
+            {90300, nullptr, "GetLastUploadError"},
+        };
+        // clang-format on
+
+        RegisterHandlers(functions);
+    }
+
+private:
+    void SaveReportWithUser(Kernel::HLERequestContext& ctx) {
+        // TODO(ogniK): Do we want to add play report?
+        LOG_WARNING(Service_PREPO, "(STUBBED) called");
 
-    IPC::ResponseBuilder rb{ctx, 2};
-    rb.Push(RESULT_SUCCESS);
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
 };
 
 void InstallInterfaces(SM::ServiceManager& service_manager) {
diff --git a/src/core/hle/service/prepo/prepo.h b/src/core/hle/service/prepo/prepo.h
index f5a6aba6d..0e7b01331 100644
--- a/src/core/hle/service/prepo/prepo.h
+++ b/src/core/hle/service/prepo/prepo.h
@@ -4,22 +4,12 @@
 
 #pragma once
 
-#include <memory>
-#include <string>
-#include "core/hle/kernel/event.h"
-#include "core/hle/service/service.h"
+namespace Service::SM {
+class ServiceManager;
+}
 
 namespace Service::PlayReport {
 
-class PlayReport final : public ServiceFramework<PlayReport> {
-public:
-    explicit PlayReport(const char* name);
-    ~PlayReport() = default;
-
-private:
-    void SaveReportWithUser(Kernel::HLERequestContext& ctx);
-};
-
 void InstallInterfaces(SM::ServiceManager& service_manager);
 
 } // namespace Service::PlayReport
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 9bb7c7b26..62f049660 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -50,6 +50,7 @@
 #include "core/hle/service/nim/nim.h"
 #include "core/hle/service/ns/ns.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
+#include "core/hle/service/nvflinger/nvflinger.h"
 #include "core/hle/service/pcie/pcie.h"
 #include "core/hle/service/pctl/pctl.h"
 #include "core/hle/service/pcv/pcv.h"
@@ -58,7 +59,6 @@
 #include "core/hle/service/psc/psc.h"
 #include "core/hle/service/service.h"
 #include "core/hle/service/set/settings.h"
-#include "core/hle/service/sm/controller.h"
 #include "core/hle/service/sm/sm.h"
 #include "core/hle/service/sockets/sockets.h"
 #include "core/hle/service/spl/module.h"
@@ -129,9 +129,9 @@ Kernel::SharedPtr<Kernel::ClientPort> ServiceFrameworkBase::CreatePort() {
     return client_port;
 }
 
-void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, size_t n) {
+void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n) {
     handlers.reserve(handlers.size() + n);
-    for (size_t i = 0; i < n; ++i) {
+    for (std::size_t i = 0; i < n; ++i) {
         // Usually this array is sorted by id already, so hint to insert at the end
         handlers.emplace_hint(handlers.cend(), functions[i].expected_header, functions[i]);
     }
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 7a051523e..2fc57a82e 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -88,7 +88,7 @@ private:
     ServiceFrameworkBase(const char* service_name, u32 max_sessions, InvokerFn* handler_invoker);
     ~ServiceFrameworkBase();
 
-    void RegisterHandlersBase(const FunctionInfoBase* functions, size_t n);
+    void RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n);
     void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info);
 
     /// Identifier string used to connect to the service.
@@ -152,7 +152,7 @@ protected:
         : ServiceFrameworkBase(service_name, max_sessions, Invoker) {}
 
     /// Registers handlers in the service.
-    template <size_t N>
+    template <std::size_t N>
     void RegisterHandlers(const FunctionInfo (&functions)[N]) {
         RegisterHandlers(functions, N);
     }
@@ -161,7 +161,7 @@ protected:
      * Registers handlers in the service. Usually prefer using the other RegisterHandlers
      * overload in order to avoid needing to specify the array size.
      */
-    void RegisterHandlers(const FunctionInfo* functions, size_t n) {
+    void RegisterHandlers(const FunctionInfo* functions, std::size_t n) {
         RegisterHandlersBase(functions, n);
     }
 
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index 59eb20155..9e5af7839 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -32,21 +32,21 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{
     LanguageCode::ZH_HANT,
 }};
 
-constexpr size_t pre4_0_0_max_entries = 0xF;
-constexpr size_t post4_0_0_max_entries = 0x40;
+constexpr std::size_t pre4_0_0_max_entries = 0xF;
+constexpr std::size_t post4_0_0_max_entries = 0x40;
 
-LanguageCode GetLanguageCodeFromIndex(size_t index) {
+LanguageCode GetLanguageCodeFromIndex(std::size_t index) {
     return available_language_codes.at(index);
 }
 
-template <size_t size>
+template <std::size_t size>
 static std::array<LanguageCode, size> MakeLanguageCodeSubset() {
     std::array<LanguageCode, size> arr;
     std::copy_n(available_language_codes.begin(), size, arr.begin());
     return arr;
 }
 
-static void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, size_t max_size) {
+static void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t max_size) {
     IPC::ResponseBuilder rb{ctx, 3};
     rb.Push(RESULT_SUCCESS);
     if (available_language_codes.size() > max_size)
diff --git a/src/core/hle/service/set/set.h b/src/core/hle/service/set/set.h
index 5f0214359..266f13e46 100644
--- a/src/core/hle/service/set/set.h
+++ b/src/core/hle/service/set/set.h
@@ -28,7 +28,7 @@ enum class LanguageCode : u64 {
     ZH_HANS = 0x00736E61482D687A,
     ZH_HANT = 0x00746E61482D687A,
 };
-LanguageCode GetLanguageCodeFromIndex(size_t idx);
+LanguageCode GetLanguageCodeFromIndex(std::size_t idx);
 
 class SET final : public ServiceFramework<SET> {
 public:
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 18d1641b8..096f0fd52 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -15,6 +15,10 @@
 
 namespace Service::SM {
 
+constexpr ResultCode ERR_ALREADY_REGISTERED(ErrorModule::SM, 4);
+constexpr ResultCode ERR_INVALID_NAME(ErrorModule::SM, 6);
+constexpr ResultCode ERR_SERVICE_NOT_REGISTERED(ErrorModule::SM, 7);
+
 ServiceManager::ServiceManager() = default;
 ServiceManager::~ServiceManager() = default;
 
@@ -24,10 +28,10 @@ void ServiceManager::InvokeControlRequest(Kernel::HLERequestContext& context) {
 
 static ResultCode ValidateServiceName(const std::string& name) {
     if (name.size() <= 0 || name.size() > 8) {
-        return ERR_INVALID_NAME_SIZE;
+        return ERR_INVALID_NAME;
     }
     if (name.find('\0') != std::string::npos) {
-        return ERR_NAME_CONTAINS_NUL;
+        return ERR_INVALID_NAME;
     }
     return RESULT_SUCCESS;
 }
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index a58d922a0..da2c51082 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -36,12 +36,6 @@ private:
     std::shared_ptr<ServiceManager> service_manager;
 };
 
-constexpr ResultCode ERR_SERVICE_NOT_REGISTERED(-1);
-constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED(-1);
-constexpr ResultCode ERR_INVALID_NAME_SIZE(-1);
-constexpr ResultCode ERR_NAME_CONTAINS_NUL(-1);
-constexpr ResultCode ERR_ALREADY_REGISTERED(-1);
-
 class ServiceManager {
 public:
     static void InstallInterfaces(std::shared_ptr<ServiceManager> self);
diff --git a/src/core/hle/service/spl/module.cpp b/src/core/hle/service/spl/module.cpp
index 0d8441fb1..44a6717d0 100644
--- a/src/core/hle/service/spl/module.cpp
+++ b/src/core/hle/service/spl/module.cpp
@@ -21,7 +21,7 @@ Module::Interface::~Interface() = default;
 void Module::Interface::GetRandomBytes(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
 
-    size_t size = ctx.GetWriteBufferSize();
+    std::size_t size = ctx.GetWriteBufferSize();
 
     std::vector<u8> data(size);
     std::generate(data.begin(), data.end(), std::rand);
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 85244ac3b..d0cde5ede 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -4,25 +4,28 @@
 
 #include <algorithm>
 #include <array>
+#include <cstring>
 #include <memory>
 #include <type_traits>
 #include <utility>
 #include <boost/optional.hpp>
 #include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_funcs.h"
+#include "common/logging/log.h"
 #include "common/math_util.h"
-#include "common/scope_exit.h"
+#include "common/swap.h"
 #include "core/core_timing.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
+#include "core/hle/service/nvflinger/nvflinger.h"
 #include "core/hle/service/vi/vi.h"
 #include "core/hle/service/vi/vi_m.h"
 #include "core/hle/service/vi/vi_s.h"
 #include "core/hle/service/vi/vi_u.h"
 #include "core/settings.h"
-#include "video_core/renderer_base.h"
-#include "video_core/video_core.h"
 
 namespace Service::VI {
 
@@ -38,7 +41,7 @@ static_assert(sizeof(DisplayInfo) == 0x60, "DisplayInfo has wrong size");
 class Parcel {
 public:
     // This default size was chosen arbitrarily.
-    static constexpr size_t DefaultBufferSize = 0x40;
+    static constexpr std::size_t DefaultBufferSize = 0x40;
     Parcel() : buffer(DefaultBufferSize) {}
     explicit Parcel(std::vector<u8> data) : buffer(std::move(data)) {}
     virtual ~Parcel() = default;
@@ -66,7 +69,7 @@ public:
         return val;
     }
 
-    std::vector<u8> ReadBlock(size_t length) {
+    std::vector<u8> ReadBlock(std::size_t length) {
         ASSERT(read_index + length <= buffer.size());
         const u8* const begin = buffer.data() + read_index;
         const u8* const end = begin + length;
@@ -156,8 +159,8 @@ private:
     static_assert(sizeof(Header) == 16, "ParcelHeader has wrong size");
 
     std::vector<u8> buffer;
-    size_t read_index = 0;
-    size_t write_index = 0;
+    std::size_t read_index = 0;
+    std::size_t write_index = 0;
 };
 
 class NativeWindow : public Parcel {
@@ -514,7 +517,7 @@ private:
                 ctx.SleepClientThread(
                     Kernel::GetCurrentThread(), "IHOSBinderDriver::DequeueBuffer", -1,
                     [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
-                        ThreadWakeupReason reason) {
+                        Kernel::ThreadWakeupReason reason) {
                         // Repeat TransactParcel DequeueBuffer when a buffer is available
                         auto buffer_queue = nv_flinger->GetBufferQueue(id);
                         boost::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
diff --git a/src/core/hle/service/vi/vi.h b/src/core/hle/service/vi/vi.h
index c2dc83605..e3963502a 100644
--- a/src/core/hle/service/vi/vi.h
+++ b/src/core/hle/service/vi/vi.h
@@ -4,11 +4,10 @@
 
 #pragma once
 
-#include "core/hle/service/nvflinger/nvflinger.h"
 #include "core/hle/service/service.h"
 
-namespace CoreTiming {
-struct EventType;
+namespace Service::NVFlinger {
+class NVFlinger;
 }
 
 namespace Service::VI {
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 120e1e133..0e2af20b4 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -300,7 +300,7 @@ SharedPtr<CodeSet> ElfReader::LoadInto(u32 vaddr) {
     }
 
     std::vector<u8> program_image(total_image_size);
-    size_t current_image_position = 0;
+    std::size_t current_image_position = 0;
 
     auto& kernel = Core::System::GetInstance().Kernel();
     SharedPtr<CodeSet> codeset = CodeSet::Create(kernel, "");
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index fa43a2650..f2a183ba1 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -155,7 +155,7 @@ constexpr std::array<const char*, 58> RESULT_MESSAGES{
 };
 
 std::ostream& operator<<(std::ostream& os, ResultStatus status) {
-    os << RESULT_MESSAGES.at(static_cast<size_t>(status));
+    os << RESULT_MESSAGES.at(static_cast<std::size_t>(status));
     return os;
 }
 
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index bb89a9da3..c49ec34ab 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -191,7 +191,7 @@ ResultStatus AppLoader_NRO::Load(Kernel::SharedPtr<Kernel::Process>& process) {
     process->svc_access_mask.set();
     process->resource_limit =
         kernel.ResourceLimitForCategory(Kernel::ResourceLimitCategory::APPLICATION);
-    process->Run(base_addr, THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);
+    process->Run(base_addr, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);
 
     is_loaded = true;
     return ResultStatus::Success;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 082a95d40..3c6306818 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -157,7 +157,8 @@ ResultStatus AppLoader_NSO::Load(Kernel::SharedPtr<Kernel::Process>& process) {
     process->svc_access_mask.set();
     process->resource_limit =
         kernel.ResourceLimitForCategory(Kernel::ResourceLimitCategory::APPLICATION);
-    process->Run(Memory::PROCESS_IMAGE_VADDR, THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);
+    process->Run(Memory::PROCESS_IMAGE_VADDR, Kernel::THREADPRIO_DEFAULT,
+                 Memory::DEFAULT_STACK_SIZE);
 
     is_loaded = true;
     return ResultStatus::Success;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 0e4e0157c..316b46820 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -370,16 +370,16 @@ u64 Read64(const VAddr addr) {
 }
 
 void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
-               const size_t size) {
+               const std::size_t size) {
     auto& page_table = process.vm_manager.page_table;
 
-    size_t remaining_size = size;
-    size_t page_index = src_addr >> PAGE_BITS;
-    size_t page_offset = src_addr & PAGE_MASK;
+    std::size_t remaining_size = size;
+    std::size_t page_index = src_addr >> PAGE_BITS;
+    std::size_t page_offset = src_addr & PAGE_MASK;
 
     while (remaining_size > 0) {
-        const size_t copy_amount =
-            std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size);
+        const std::size_t copy_amount =
+            std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
@@ -414,7 +414,7 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
     }
 }
 
-void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) {
+void ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
     ReadBlock(*Core::CurrentProcess(), src_addr, dest_buffer, size);
 }
 
@@ -435,15 +435,15 @@ void Write64(const VAddr addr, const u64 data) {
 }
 
 void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
-                const size_t size) {
+                const std::size_t size) {
     auto& page_table = process.vm_manager.page_table;
-    size_t remaining_size = size;
-    size_t page_index = dest_addr >> PAGE_BITS;
-    size_t page_offset = dest_addr & PAGE_MASK;
+    std::size_t remaining_size = size;
+    std::size_t page_index = dest_addr >> PAGE_BITS;
+    std::size_t page_offset = dest_addr & PAGE_MASK;
 
     while (remaining_size > 0) {
-        const size_t copy_amount =
-            std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size);
+        const std::size_t copy_amount =
+            std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
@@ -477,19 +477,19 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
     }
 }
 
-void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size) {
+void WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
     WriteBlock(*Core::CurrentProcess(), dest_addr, src_buffer, size);
 }
 
-void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size_t size) {
+void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
     auto& page_table = process.vm_manager.page_table;
-    size_t remaining_size = size;
-    size_t page_index = dest_addr >> PAGE_BITS;
-    size_t page_offset = dest_addr & PAGE_MASK;
+    std::size_t remaining_size = size;
+    std::size_t page_index = dest_addr >> PAGE_BITS;
+    std::size_t page_offset = dest_addr & PAGE_MASK;
 
     while (remaining_size > 0) {
-        const size_t copy_amount =
-            std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size);
+        const std::size_t copy_amount =
+            std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
@@ -522,15 +522,16 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size
     }
 }
 
-void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr, const size_t size) {
+void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
+               const std::size_t size) {
     auto& page_table = process.vm_manager.page_table;
-    size_t remaining_size = size;
-    size_t page_index = src_addr >> PAGE_BITS;
-    size_t page_offset = src_addr & PAGE_MASK;
+    std::size_t remaining_size = size;
+    std::size_t page_index = src_addr >> PAGE_BITS;
+    std::size_t page_offset = src_addr & PAGE_MASK;
 
     while (remaining_size > 0) {
-        const size_t copy_amount =
-            std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size);
+        const std::size_t copy_amount =
+            std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
         const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
 
         switch (page_table.attributes[page_index]) {
@@ -565,7 +566,7 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
     }
 }
 
-void CopyBlock(VAddr dest_addr, VAddr src_addr, size_t size) {
+void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size) {
     CopyBlock(*Core::CurrentProcess(), dest_addr, src_addr, size);
 }
 
diff --git a/src/core/memory.h b/src/core/memory.h
index f06e04a75..2a27c0251 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -22,11 +22,11 @@ namespace Memory {
  * Page size used by the ARM architecture. This is the smallest granularity with which memory can
  * be mapped.
  */
-constexpr size_t PAGE_BITS = 12;
+constexpr std::size_t PAGE_BITS = 12;
 constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
 constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
-constexpr size_t ADDRESS_SPACE_BITS = 36;
-constexpr size_t PAGE_TABLE_NUM_ENTRIES = 1ULL << (ADDRESS_SPACE_BITS - PAGE_BITS);
+constexpr std::size_t ADDRESS_SPACE_BITS = 36;
+constexpr std::size_t PAGE_TABLE_NUM_ENTRIES = 1ULL << (ADDRESS_SPACE_BITS - PAGE_BITS);
 
 enum class PageType : u8 {
     /// Page is unmapped and should cause an access error.
@@ -154,13 +154,13 @@ void Write16(VAddr addr, u16 data);
 void Write32(VAddr addr, u32 data);
 void Write64(VAddr addr, u64 data);
 
-void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, size_t size);
-void ReadBlock(VAddr src_addr, void* dest_buffer, size_t size);
+void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, std::size_t size);
+void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
 void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
-                size_t size);
-void WriteBlock(VAddr dest_addr, const void* src_buffer, size_t size);
-void ZeroBlock(const Kernel::Process& process, VAddr dest_addr, size_t size);
-void CopyBlock(VAddr dest_addr, VAddr src_addr, size_t size);
+                std::size_t size);
+void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
+void ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size);
+void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
 
 u8* GetPointer(VAddr vaddr);
 
diff --git a/src/core/memory_hook.h b/src/core/memory_hook.h
index e8ea19333..0269c7ff1 100644
--- a/src/core/memory_hook.h
+++ b/src/core/memory_hook.h
@@ -32,14 +32,14 @@ public:
     virtual boost::optional<u32> Read32(VAddr addr) = 0;
     virtual boost::optional<u64> Read64(VAddr addr) = 0;
 
-    virtual bool ReadBlock(VAddr src_addr, void* dest_buffer, size_t size) = 0;
+    virtual bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) = 0;
 
     virtual bool Write8(VAddr addr, u8 data) = 0;
     virtual bool Write16(VAddr addr, u16 data) = 0;
     virtual bool Write32(VAddr addr, u32 data) = 0;
     virtual bool Write64(VAddr addr, u64 data) = 0;
 
-    virtual bool WriteBlock(VAddr dest_addr, const void* src_buffer, size_t size) = 0;
+    virtual bool WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size) = 0;
 };
 
 using MemoryHookPointer = std::shared_ptr<MemoryHook>;
diff --git a/src/core/settings.h b/src/core/settings.h
index 08a16ef2c..0318d019c 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -148,6 +148,7 @@ struct Values {
 
     // Audio
     std::string sink_id;
+    bool enable_audio_stretching;
     std::string audio_device_id;
     float volume;
 
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 3730e85b8..b0df154ca 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -120,6 +120,9 @@ TelemetrySession::TelemetrySession() {
     Telemetry::AppendOSInfo(field_collection);
 
     // Log user configuration information
+    AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id);
+    AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching",
+             Settings::values.enable_audio_stretching);
     AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.use_cpu_jit);
     AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
              Settings::values.use_multi_core);
diff --git a/src/core/tracer/recorder.cpp b/src/core/tracer/recorder.cpp
index af032f0c9..73cacb47f 100644
--- a/src/core/tracer/recorder.cpp
+++ b/src/core/tracer/recorder.cpp
@@ -76,7 +76,7 @@ void Recorder::Finish(const std::string& filename) {
     try {
         // Open file and write header
         FileUtil::IOFile file(filename, "wb");
-        size_t written = file.WriteObject(header);
+        std::size_t written = file.WriteObject(header);
         if (written != 1 || file.Tell() != initial.gpu_registers)
             throw "Failed to write header";
 
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 4d74bb395..37f09ce5f 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,16 +1,15 @@
 add_executable(tests
     common/param_package.cpp
+    common/ring_buffer.cpp
     core/arm/arm_test_common.cpp
     core/arm/arm_test_common.h
     core/core_timing.cpp
-    glad.cpp
     tests.cpp
 )
 
 create_target_directory_groups(tests)
 
 target_link_libraries(tests PRIVATE common core)
-target_link_libraries(tests PRIVATE glad) # To support linker work-around
 target_link_libraries(tests PRIVATE ${PLATFORM_LIBRARIES} catch-single-include Threads::Threads)
 
 add_test(NAME tests COMMAND tests)
diff --git a/src/tests/common/ring_buffer.cpp b/src/tests/common/ring_buffer.cpp
new file mode 100644
index 000000000..c883c4d56
--- /dev/null
+++ b/src/tests/common/ring_buffer.cpp
@@ -0,0 +1,130 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <numeric>
+#include <thread>
+#include <vector>
+#include <catch2/catch.hpp>
+#include "common/ring_buffer.h"
+
+namespace Common {
+
+TEST_CASE("RingBuffer: Basic Tests", "[common]") {
+    RingBuffer<char, 4, 1> buf;
+
+    // Pushing values into a ring buffer with space should succeed.
+    for (std::size_t i = 0; i < 4; i++) {
+        const char elem = static_cast<char>(i);
+        const std::size_t count = buf.Push(&elem, 1);
+        REQUIRE(count == 1);
+    }
+
+    REQUIRE(buf.Size() == 4);
+
+    // Pushing values into a full ring buffer should fail.
+    {
+        const char elem = static_cast<char>(42);
+        const std::size_t count = buf.Push(&elem, 1);
+        REQUIRE(count == 0);
+    }
+
+    REQUIRE(buf.Size() == 4);
+
+    // Popping multiple values from a ring buffer with values should succeed.
+    {
+        const std::vector<char> popped = buf.Pop(2);
+        REQUIRE(popped.size() == 2);
+        REQUIRE(popped[0] == 0);
+        REQUIRE(popped[1] == 1);
+    }
+
+    REQUIRE(buf.Size() == 2);
+
+    // Popping a single value from a ring buffer with values should succeed.
+    {
+        const std::vector<char> popped = buf.Pop(1);
+        REQUIRE(popped.size() == 1);
+        REQUIRE(popped[0] == 2);
+    }
+
+    REQUIRE(buf.Size() == 1);
+
+    // Pushing more values than space available should partially suceed.
+    {
+        std::vector<char> to_push(6);
+        std::iota(to_push.begin(), to_push.end(), 88);
+        const std::size_t count = buf.Push(to_push);
+        REQUIRE(count == 3);
+    }
+
+    REQUIRE(buf.Size() == 4);
+
+    // Doing an unlimited pop should pop all values.
+    {
+        const std::vector<char> popped = buf.Pop();
+        REQUIRE(popped.size() == 4);
+        REQUIRE(popped[0] == 3);
+        REQUIRE(popped[1] == 88);
+        REQUIRE(popped[2] == 89);
+        REQUIRE(popped[3] == 90);
+    }
+
+    REQUIRE(buf.Size() == 0);
+}
+
+TEST_CASE("RingBuffer: Threaded Test", "[common]") {
+    RingBuffer<char, 4, 2> buf;
+    const char seed = 42;
+    const std::size_t count = 1000000;
+    std::size_t full = 0;
+    std::size_t empty = 0;
+
+    const auto next_value = [](std::array<char, 2>& value) {
+        value[0] += 1;
+        value[1] += 2;
+    };
+
+    std::thread producer{[&] {
+        std::array<char, 2> value = {seed, seed};
+        std::size_t i = 0;
+        while (i < count) {
+            if (const std::size_t c = buf.Push(&value[0], 1); c > 0) {
+                REQUIRE(c == 1);
+                i++;
+                next_value(value);
+            } else {
+                full++;
+                std::this_thread::yield();
+            }
+        }
+    }};
+
+    std::thread consumer{[&] {
+        std::array<char, 2> value = {seed, seed};
+        std::size_t i = 0;
+        while (i < count) {
+            if (const std::vector<char> v = buf.Pop(1); v.size() > 0) {
+                REQUIRE(v.size() == 2);
+                REQUIRE(v[0] == value[0]);
+                REQUIRE(v[1] == value[1]);
+                i++;
+                next_value(value);
+            } else {
+                empty++;
+                std::this_thread::yield();
+            }
+        }
+    }};
+
+    producer.join();
+    consumer.join();
+
+    REQUIRE(buf.Size() == 0);
+    printf("RingBuffer: Threaded Test: full: %zu, empty: %zu\n", full, empty);
+}
+
+} // namespace Common
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 038d57b3a..7c69fc26e 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -87,11 +87,11 @@ boost::optional<u64> TestEnvironment::TestMemory::Read64(VAddr addr) {
     return *Read32(addr) | static_cast<u64>(*Read32(addr + 4)) << 32;
 }
 
-bool TestEnvironment::TestMemory::ReadBlock(VAddr src_addr, void* dest_buffer, size_t size) {
+bool TestEnvironment::TestMemory::ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) {
     VAddr addr = src_addr;
     u8* data = static_cast<u8*>(dest_buffer);
 
-    for (size_t i = 0; i < size; i++, addr++, data++) {
+    for (std::size_t i = 0; i < size; i++, addr++, data++) {
         *data = *Read8(addr);
     }
 
@@ -126,11 +126,12 @@ bool TestEnvironment::TestMemory::Write64(VAddr addr, u64 data) {
     return true;
 }
 
-bool TestEnvironment::TestMemory::WriteBlock(VAddr dest_addr, const void* src_buffer, size_t size) {
+bool TestEnvironment::TestMemory::WriteBlock(VAddr dest_addr, const void* src_buffer,
+                                             std::size_t size) {
     VAddr addr = dest_addr;
     const u8* data = static_cast<const u8*>(src_buffer);
 
-    for (size_t i = 0; i < size; i++, addr++, data++) {
+    for (std::size_t i = 0; i < size; i++, addr++, data++) {
         env->write_records.emplace_back(8, addr, *data);
         if (env->mutable_memory)
             env->SetMemory8(addr, *data);
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h
index e4b6df194..5de8dab4e 100644
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -19,8 +19,8 @@ struct PageTable;
 namespace ArmTests {
 
 struct WriteRecord {
-    WriteRecord(size_t size, VAddr addr, u64 data) : size(size), addr(addr), data(data) {}
-    size_t size;
+    WriteRecord(std::size_t size, VAddr addr, u64 data) : size(size), addr(addr), data(data) {}
+    std::size_t size;
     VAddr addr;
     u64 data;
     bool operator==(const WriteRecord& o) const {
@@ -71,14 +71,14 @@ private:
         boost::optional<u32> Read32(VAddr addr) override;
         boost::optional<u64> Read64(VAddr addr) override;
 
-        bool ReadBlock(VAddr src_addr, void* dest_buffer, size_t size) override;
+        bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) override;
 
         bool Write8(VAddr addr, u8 data) override;
         bool Write16(VAddr addr, u16 data) override;
         bool Write32(VAddr addr, u32 data) override;
         bool Write64(VAddr addr, u64 data) override;
 
-        bool WriteBlock(VAddr dest_addr, const void* src_buffer, size_t size) override;
+        bool WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size) override;
 
         std::unordered_map<VAddr, u8> data;
     };
diff --git a/src/tests/glad.cpp b/src/tests/glad.cpp
deleted file mode 100644
index 1797c0e3d..000000000
--- a/src/tests/glad.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2016 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <catch2/catch.hpp>
-#include <glad/glad.h>
-
-// This is not an actual test, but a work-around for issue #2183.
-// If tests uses functions in core but doesn't explicitly use functions in glad, the linker of macOS
-// will error about undefined references from video_core to glad. So we explicitly use a glad
-// function here to shut up the linker.
-TEST_CASE("glad fake test", "[dummy]") {
-    REQUIRE(&gladLoadGL != nullptr);
-}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index ac4ec36de..f5ae57039 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,6 +5,8 @@ add_library(video_core STATIC
     debug_utils/debug_utils.h
     engines/fermi_2d.cpp
     engines/fermi_2d.h
+    engines/kepler_memory.cpp
+    engines/kepler_memory.h
     engines/maxwell_3d.cpp
     engines/maxwell_3d.h
     engines/maxwell_compute.cpp
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 2625ddfdc..f1aa6091b 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -14,6 +14,7 @@
 #include "core/tracer/recorder.h"
 #include "video_core/command_processor.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_compute.h"
 #include "video_core/engines/maxwell_dma.h"
@@ -69,6 +70,9 @@ void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
         case EngineID::MAXWELL_DMA_COPY_A:
             maxwell_dma->WriteReg(method, value);
             break;
+        case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+            kepler_memory->WriteReg(method, value);
+            break;
         default:
             UNIMPLEMENTED_MSG("Unimplemented engine");
         }
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index dcf9ef8b9..021b83eaa 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -26,7 +26,7 @@ public:
     void WriteReg(u32 method, u32 value);
 
     struct Regs {
-        static constexpr size_t NUM_REGS = 0x258;
+        static constexpr std::size_t NUM_REGS = 0x258;
 
         struct Surface {
             RenderTargetFormat format;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
new file mode 100644
index 000000000..66ae6332d
--- /dev/null
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -0,0 +1,45 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_memory.h"
+
+namespace Tegra::Engines {
+
+KeplerMemory::KeplerMemory(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
+KeplerMemory::~KeplerMemory() = default;
+
+void KeplerMemory::WriteReg(u32 method, u32 value) {
+    ASSERT_MSG(method < Regs::NUM_REGS,
+               "Invalid KeplerMemory register, increase the size of the Regs structure");
+
+    regs.reg_array[method] = value;
+
+    switch (method) {
+    case KEPLERMEMORY_REG_INDEX(exec): {
+        state.write_offset = 0;
+        break;
+    }
+    case KEPLERMEMORY_REG_INDEX(data): {
+        ProcessData(value);
+        break;
+    }
+    }
+}
+
+void KeplerMemory::ProcessData(u32 data) {
+    ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
+    ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
+
+    GPUVAddr address = regs.dest.Address();
+    VAddr dest_address =
+        *memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
+
+    Memory::Write32(dest_address, data);
+
+    state.write_offset++;
+}
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
new file mode 100644
index 000000000..b0d0078cf
--- /dev/null
+++ b/src/video_core/engines/kepler_memory.h
@@ -0,0 +1,90 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Engines {
+
+#define KEPLERMEMORY_REG_INDEX(field_name)                                                         \
+    (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
+
+class KeplerMemory final {
+public:
+    KeplerMemory(MemoryManager& memory_manager);
+    ~KeplerMemory();
+
+    /// Write the value to the register identified by method.
+    void WriteReg(u32 method, u32 value);
+
+    struct Regs {
+        static constexpr size_t NUM_REGS = 0x7F;
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS(0x60);
+
+                u32 line_length_in;
+                u32 line_count;
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 pitch;
+                    u32 block_dimensions;
+                    u32 width;
+                    u32 height;
+                    u32 depth;
+                    u32 z;
+                    u32 x;
+                    u32 y;
+
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } dest;
+
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec;
+
+                u32 data;
+
+                INSERT_PADDING_WORDS(0x11);
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+    struct {
+        u32 write_offset = 0;
+    } state{};
+
+private:
+    MemoryManager& memory_manager;
+
+    void ProcessData(u32 data);
+};
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4,                        \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(line_length_in, 0x60);
+ASSERT_REG_POSITION(line_count, 0x61);
+ASSERT_REG_POSITION(dest, 0x62);
+ASSERT_REG_POSITION(exec, 0x6C);
+ASSERT_REG_POSITION(data, 0x6D);
+#undef ASSERT_REG_POSITION
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 329079ddd..8afd26fe9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -248,8 +248,8 @@ void Maxwell3D::DrawArrays() {
 
 void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
     // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
-    auto& shader = state.shader_stages[static_cast<size_t>(stage)];
-    auto& bind_data = regs.cb_bind[static_cast<size_t>(stage)];
+    auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
+    auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)];
 
     auto& buffer = shader.const_buffers[bind_data.index];
 
@@ -316,14 +316,14 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
 std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {
     std::vector<Texture::FullTextureInfo> textures;
 
-    auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)];
+    auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)];
     auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
     ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
 
     GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
 
     // Offset into the texture constbuffer where the texture info begins.
-    static constexpr size_t TextureInfoOffset = 0x20;
+    static constexpr std::size_t TextureInfoOffset = 0x20;
 
     for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
          current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
@@ -360,8 +360,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
     return textures;
 }
 
-Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const {
-    auto& shader = state.shader_stages[static_cast<size_t>(stage)];
+Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
+                                                    std::size_t offset) const {
+    auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
     auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
     ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
 
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d3be900a4..b81b0723d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -34,17 +34,17 @@ public:
     /// Register structure of the Maxwell3D engine.
     /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
     struct Regs {
-        static constexpr size_t NUM_REGS = 0xE00;
-
-        static constexpr size_t NumRenderTargets = 8;
-        static constexpr size_t NumViewports = 16;
-        static constexpr size_t NumCBData = 16;
-        static constexpr size_t NumVertexArrays = 32;
-        static constexpr size_t NumVertexAttributes = 32;
-        static constexpr size_t MaxShaderProgram = 6;
-        static constexpr size_t MaxShaderStage = 5;
+        static constexpr std::size_t NUM_REGS = 0xE00;
+
+        static constexpr std::size_t NumRenderTargets = 8;
+        static constexpr std::size_t NumViewports = 16;
+        static constexpr std::size_t NumCBData = 16;
+        static constexpr std::size_t NumVertexArrays = 32;
+        static constexpr std::size_t NumVertexAttributes = 32;
+        static constexpr std::size_t MaxShaderProgram = 6;
+        static constexpr std::size_t MaxShaderStage = 5;
         // Maximum number of const buffers per shader stage.
-        static constexpr size_t MaxConstBuffers = 18;
+        static constexpr std::size_t MaxConstBuffers = 18;
 
         enum class QueryMode : u32 {
             Write = 0,
@@ -443,9 +443,9 @@ public:
             }
         };
 
-        bool IsShaderConfigEnabled(size_t index) const {
+        bool IsShaderConfigEnabled(std::size_t index) const {
             // The VertexB is always enabled.
-            if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) {
+            if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
                 return true;
             }
             return shader_config[index].enable != 0;
@@ -571,7 +571,7 @@ public:
                         BitField<25, 3, u32> map_7;
                     };
 
-                    u32 GetMap(size_t index) const {
+                    u32 GetMap(std::size_t index) const {
                         const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
                                                                      map_4, map_5, map_6, map_7};
                         ASSERT(index < maps.size());
@@ -925,7 +925,7 @@ public:
     std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
 
     /// Returns the texture information for a specific texture in a specific shader stage.
-    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
+    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
 
 private:
     VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index c24d33d5c..aa7481b8c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -50,7 +50,7 @@ void MaxwellDMA::HandleCopy() {
     ASSERT(regs.dst_params.pos_y == 0);
 
     if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
-        size_t copy_size = regs.x_count;
+        std::size_t copy_size = regs.x_count;
 
         // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
         // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count).
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 7882f16e0..311ccb616 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -23,7 +23,7 @@ public:
     void WriteReg(u32 method, u32 value);
 
     struct Regs {
-        static constexpr size_t NUM_REGS = 0x1D6;
+        static constexpr std::size_t NUM_REGS = 0x1D6;
 
         struct Parameters {
             union {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 9176a8dbc..6e555ea03 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -20,10 +20,10 @@ namespace Tegra::Shader {
 
 struct Register {
     /// Number of registers
-    static constexpr size_t NumRegisters = 256;
+    static constexpr std::size_t NumRegisters = 256;
 
     /// Register 255 is special cased to always be 0
-    static constexpr size_t ZeroIndex = 255;
+    static constexpr std::size_t ZeroIndex = 255;
 
     enum class Size : u64 {
         Byte = 0,
@@ -67,6 +67,13 @@ private:
     u64 value{};
 };
 
+enum class AttributeSize : u64 {
+    Word = 0,
+    DoubleWord = 1,
+    TripleWord = 2,
+    QuadWord = 3,
+};
+
 union Attribute {
     Attribute() = default;
 
@@ -87,9 +94,10 @@ union Attribute {
     };
 
     union {
+        BitField<20, 10, u64> immediate;
         BitField<22, 2, u64> element;
         BitField<24, 6, Index> index;
-        BitField<47, 3, u64> size;
+        BitField<47, 3, AttributeSize> size;
     } fmt20;
 
     union {
@@ -254,6 +262,15 @@ enum class TextureQueryType : u64 {
     BorderColor = 22,
 };
 
+enum class TextureProcessMode : u64 {
+    None = 0,
+    LZ = 1,  // Unknown, appears to be the same as none.
+    LB = 2,  // Load Bias.
+    LL = 3,  // Load LOD (LevelOfDetail)
+    LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
+    LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL
+};
+
 enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };
 enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 };
 
@@ -424,6 +441,45 @@ union Instruction {
     } bfe;
 
     union {
+        BitField<48, 3, u64> pred48;
+
+        union {
+            BitField<20, 20, u64> entry_a;
+            BitField<39, 5, u64> entry_b;
+            BitField<45, 1, u64> neg;
+            BitField<46, 1, u64> uses_cc;
+        } imm;
+
+        union {
+            BitField<20, 14, u64> cb_index;
+            BitField<34, 5, u64> cb_offset;
+            BitField<56, 1, u64> neg;
+            BitField<57, 1, u64> uses_cc;
+        } hi;
+
+        union {
+            BitField<20, 14, u64> cb_index;
+            BitField<34, 5, u64> cb_offset;
+            BitField<39, 5, u64> entry_a;
+            BitField<45, 1, u64> neg;
+            BitField<46, 1, u64> uses_cc;
+        } rz;
+
+        union {
+            BitField<39, 5, u64> entry_a;
+            BitField<45, 1, u64> neg;
+            BitField<46, 1, u64> uses_cc;
+        } r1;
+
+        union {
+            BitField<28, 8, u64> entry_a;
+            BitField<37, 1, u64> neg;
+            BitField<38, 1, u64> uses_cc;
+        } r2;
+
+    } lea;
+
+    union {
         BitField<0, 5, FlowCondition> cond;
     } flow;
 
@@ -478,6 +534,18 @@ union Instruction {
     } psetp;
 
     union {
+        BitField<12, 3, u64> pred12;
+        BitField<15, 1, u64> neg_pred12;
+        BitField<24, 2, PredOperation> cond;
+        BitField<29, 3, u64> pred29;
+        BitField<32, 1, u64> neg_pred29;
+        BitField<39, 3, u64> pred39;
+        BitField<42, 1, u64> neg_pred39;
+        BitField<44, 1, u64> bf;
+        BitField<45, 2, PredOperation> op;
+    } pset;
+
+    union {
         BitField<39, 3, u64> pred39;
         BitField<42, 1, u64> neg_pred;
         BitField<43, 1, u64> neg_a;
@@ -522,8 +590,9 @@ union Instruction {
         BitField<28, 1, u64> array;
         BitField<29, 2, TextureType> texture_type;
         BitField<31, 4, u64> component_mask;
+        BitField<55, 3, TextureProcessMode> process_mode;
 
-        bool IsComponentEnabled(size_t component) const {
+        bool IsComponentEnabled(std::size_t component) const {
             return ((1ull << component) & component_mask) != 0;
         }
     } tex;
@@ -538,7 +607,7 @@ union Instruction {
         BitField<29, 2, TextureType> texture_type;
         BitField<31, 4, u64> component_mask;
 
-        bool IsComponentEnabled(size_t component) const {
+        bool IsComponentEnabled(std::size_t component) const {
             return ((1ull << component) & component_mask) != 0;
         }
     } tmml;
@@ -585,7 +654,7 @@ union Instruction {
             return gpr28.Value() != Register::ZeroIndex;
         }
 
-        bool IsComponentEnabled(size_t component) const {
+        bool IsComponentEnabled(std::size_t component) const {
             static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
                 {},
                 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
@@ -593,7 +662,7 @@ union Instruction {
                 {0x7, 0xb, 0xd, 0xe, 0xf},
             }};
 
-            size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
+            std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
             index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
 
             u32 mask = mask_lut[index][component_mask_selector];
@@ -726,6 +795,11 @@ public:
         ISCADD_C, // Scale and Add
         ISCADD_R,
         ISCADD_IMM,
+        LEA_R1,
+        LEA_R2,
+        LEA_RZ,
+        LEA_IMM,
+        LEA_HI,
         POPC_C,
         POPC_R,
         POPC_IMM,
@@ -784,6 +858,7 @@ public:
         ISET_C,
         ISET_IMM,
         PSETP,
+        PSET,
         XMAD_IMM,
         XMAD_CR,
         XMAD_RC,
@@ -807,6 +882,7 @@ public:
         IntegerSet,
         IntegerSetPredicate,
         PredicateSetPredicate,
+        PredicateSetRegister,
         Conversion,
         Xmad,
         Unknown,
@@ -871,7 +947,7 @@ public:
 private:
     struct Detail {
     private:
-        static constexpr size_t opcode_bitsize = 16;
+        static constexpr std::size_t opcode_bitsize = 16;
 
         /**
          * Generates the mask and the expected value after masking from a given bitstring.
@@ -880,8 +956,8 @@ private:
          */
         static auto GetMaskAndExpect(const char* const bitstring) {
             u16 mask = 0, expect = 0;
-            for (size_t i = 0; i < opcode_bitsize; i++) {
-                const size_t bit_position = opcode_bitsize - i - 1;
+            for (std::size_t i = 0; i < opcode_bitsize; i++) {
+                const std::size_t bit_position = opcode_bitsize - i - 1;
                 switch (bitstring[i]) {
                 case '0':
                     mask |= 1 << bit_position;
@@ -958,6 +1034,11 @@ private:
             INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
             INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
             INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
+            INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
+            INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
+            INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
+            INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
+            INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
             INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -1012,6 +1093,7 @@ private:
             INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
             INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
             INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
+            INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
             INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
             INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
             INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 86a809f86..baa8b63b7 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -4,6 +4,7 @@
 
 #include "common/assert.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_compute.h"
 #include "video_core/engines/maxwell_dma.h"
@@ -27,6 +28,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
     fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
     maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
     maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(*memory_manager);
 }
 
 GPU::~GPU() = default;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 589a59b4f..5cc1e19ca 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -42,6 +42,7 @@ enum class RenderTargetFormat : u32 {
     R32_UINT = 0xE4,
     R32_FLOAT = 0xE5,
     B5G6R5_UNORM = 0xE8,
+    BGR5A1_UNORM = 0xE9,
     RG8_UNORM = 0xEA,
     RG8_SNORM = 0xEB,
     R16_UNORM = 0xEE,
@@ -102,6 +103,7 @@ class Fermi2D;
 class Maxwell3D;
 class MaxwellCompute;
 class MaxwellDMA;
+class KeplerMemory;
 } // namespace Engines
 
 enum class EngineID {
@@ -146,6 +148,8 @@ private:
     std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
     /// DMA engine
     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
+    /// Inline memory engine
+    std::unique_ptr<Engines::KeplerMemory> kepler_memory;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index 7d836b816..cee0baaf3 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -152,7 +152,7 @@ private:
     boost::optional<u32>
         delayed_pc; ///< Program counter to execute at after the delay slot is executed.
 
-    static constexpr size_t NumMacroRegisters = 8;
+    static constexpr std::size_t NumMacroRegisters = 8;
 
     /// General purpose macro registers.
     std::array<u32, NumMacroRegisters> registers = {};
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index be17a2b9c..0df3725c2 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -19,6 +19,7 @@ void RendererBase::RefreshBaseSettings() {
     UpdateCurrentFramebufferLayout();
 
     renderer_settings.use_framelimiter = Settings::values.use_frame_limit;
+    renderer_settings.set_background_color = true;
 }
 
 void RendererBase::UpdateCurrentFramebufferLayout() {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 2a357f9d0..2cd0738ff 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -19,6 +19,7 @@ namespace VideoCore {
 
 struct RendererSettings {
     std::atomic_bool use_framelimiter{false};
+    std::atomic_bool set_background_color{false};
 };
 
 class RendererBase : NonCopyable {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 0b5d18bcb..578aca789 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -12,10 +12,10 @@
 
 namespace OpenGL {
 
-OGLBufferCache::OGLBufferCache(size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
+OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
 
-GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment,
-                                      bool cache) {
+GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
+                                      std::size_t alignment, bool cache) {
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
     const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
 
@@ -53,7 +53,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, siz
     return uploaded_offset;
 }
 
-GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment) {
+GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size,
+                                          std::size_t alignment) {
     AlignBuffer(alignment);
     std::memcpy(buffer_ptr, raw_pointer, size);
     GLintptr uploaded_offset = buffer_offset;
@@ -63,7 +64,7 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size,
     return uploaded_offset;
 }
 
-void OGLBufferCache::Map(size_t max_size) {
+void OGLBufferCache::Map(std::size_t max_size) {
     bool invalidate;
     std::tie(buffer_ptr, buffer_offset_base, invalidate) =
         stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
@@ -81,10 +82,10 @@ GLuint OGLBufferCache::GetHandle() const {
     return stream_buffer.GetHandle();
 }
 
-void OGLBufferCache::AlignBuffer(size_t alignment) {
+void OGLBufferCache::AlignBuffer(std::size_t alignment) {
     // Align the offset, not the mapped pointer
     GLintptr offset_aligned =
-        static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
+        static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
     buffer_ptr += offset_aligned - buffer_offset;
     buffer_offset = offset_aligned;
 }
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 6da862902..6c18461f4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -19,32 +19,32 @@ struct CachedBufferEntry final {
         return addr;
     }
 
-    size_t GetSizeInBytes() const {
+    std::size_t GetSizeInBytes() const {
         return size;
     }
 
     VAddr addr;
-    size_t size;
+    std::size_t size;
     GLintptr offset;
-    size_t alignment;
+    std::size_t alignment;
 };
 
 class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
 public:
-    explicit OGLBufferCache(size_t size);
+    explicit OGLBufferCache(std::size_t size);
 
-    GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment = 4,
+    GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
                           bool cache = true);
 
-    GLintptr UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment = 4);
+    GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
 
-    void Map(size_t max_size);
+    void Map(std::size_t max_size);
     void Unmap();
 
     GLuint GetHandle() const;
 
 protected:
-    void AlignBuffer(size_t alignment);
+    void AlignBuffer(std::size_t alignment);
 
 private:
     OGLStreamBuffer stream_buffer;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c59f3af1b..274c2dbcf 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <array>
 #include <memory>
 #include <string>
 #include <string_view>
@@ -45,7 +46,7 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100,
 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
     : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
     // Create sampler objects
-    for (size_t i = 0; i < texture_samplers.size(); ++i) {
+    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
         texture_samplers[i].Create();
         state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
     }
@@ -58,6 +59,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
 
         if (extension == "GL_ARB_direct_state_access") {
             has_ARB_direct_state_access = true;
+        } else if (extension == "GL_ARB_multi_bind") {
+            has_ARB_multi_bind = true;
         } else if (extension == "GL_ARB_separate_shader_objects") {
             has_ARB_separate_shader_objects = true;
         } else if (extension == "GL_ARB_vertex_attrib_binding") {
@@ -178,7 +181,7 @@ void RasterizerOpenGL::SetupShaders() {
     u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
     u32 current_texture_bindpoint = 0;
 
-    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         const auto& shader_config = gpu.regs.shader_config[index];
         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
 
@@ -187,12 +190,12 @@ void RasterizerOpenGL::SetupShaders() {
             continue;
         }
 
-        const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
+        const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
 
         GLShader::MaxwellUniformData ubo{};
         ubo.SetFromRegs(gpu.state.shader_stages[stage]);
         const GLintptr offset = buffer_cache.UploadHostMemory(
-            &ubo, sizeof(ubo), static_cast<size_t>(uniform_buffer_alignment));
+            &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
 
         // Bind the buffer
         glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo));
@@ -235,10 +238,10 @@ void RasterizerOpenGL::SetupShaders() {
     shader_program_manager->UseTrivialGeometryShader();
 }
 
-size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
+std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
-    size_t size = 0;
+    std::size_t size = 0;
     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
         if (!regs.vertex_array[index].IsEnabled())
             continue;
@@ -296,7 +299,7 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
 
 void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
                                              bool preserve_contents,
-                                             boost::optional<size_t> single_color_target) {
+                                             boost::optional<std::size_t> single_color_target) {
     MICROPROFILE_SCOPE(OpenGL_Framebuffer);
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
@@ -327,7 +330,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
         } else {
             // Multiple color attachments are enabled
             std::array<GLenum, Maxwell::NumRenderTargets> buffers;
-            for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+            for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
                 Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
                 buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
                 glFramebufferTexture2D(
@@ -339,7 +342,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
         }
     } else {
         // No color attachments are enabled - zero out all of them
-        for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+        for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
                                    GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
                                    0, 0);
@@ -459,15 +462,15 @@ void RasterizerOpenGL::DrawArrays() {
     state.draw.vertex_buffer = buffer_cache.GetHandle();
     state.Apply();
 
-    size_t buffer_size = CalculateVertexArraysSize();
+    std::size_t buffer_size = CalculateVertexArraysSize();
 
     if (is_indexed) {
-        buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size;
+        buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;
     }
 
     // Uniform space for the 5 shader stages
     buffer_size =
-        Common::AlignUp<size_t>(buffer_size, 4) +
+        Common::AlignUp<std::size_t>(buffer_size, 4) +
         (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
 
     // Add space for at least 18 constant buffers
@@ -641,19 +644,30 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
     MICROPROFILE_SCOPE(OpenGL_UBO);
     const auto& gpu = Core::System::GetInstance().GPU();
     const auto& maxwell3d = gpu.Maxwell3D();
-    const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
+    const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
     const auto& entries = shader->GetShaderEntries().const_buffer_entries;
 
+    constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
+    std::array<GLuint, max_binds> bind_buffers;
+    std::array<GLintptr, max_binds> bind_offsets;
+    std::array<GLsizeiptr, max_binds> bind_sizes;
+
+    ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
+
     // Upload only the enabled buffers from the 16 constbuffers of each shader stage
     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& used_buffer = entries[bindpoint];
         const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
 
         if (!buffer.enabled) {
+            // With disabled buffers set values as zero to unbind them
+            bind_buffers[bindpoint] = 0;
+            bind_offsets[bindpoint] = 0;
+            bind_sizes[bindpoint] = 0;
             continue;
         }
 
-        size_t size = 0;
+        std::size_t size = 0;
 
         if (used_buffer.IsIndirect()) {
             // Buffer is accessed indirectly, so upload the entire thing
@@ -675,17 +689,22 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
         ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
 
         GLintptr const_buffer_offset = buffer_cache.UploadMemory(
-            buffer.address, size, static_cast<size_t>(uniform_buffer_alignment));
-
-        glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint,
-                          buffer_cache.GetHandle(), const_buffer_offset, size);
+            buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
 
         // Now configure the bindpoint of the buffer inside the shader
         glUniformBlockBinding(shader->GetProgramHandle(),
                               shader->GetProgramResourceIndex(used_buffer),
                               current_bindpoint + bindpoint);
+
+        // Prepare values for multibind
+        bind_buffers[bindpoint] = buffer_cache.GetHandle();
+        bind_offsets[bindpoint] = const_buffer_offset;
+        bind_sizes[bindpoint] = size;
     }
 
+    glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()),
+                       bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
+
     return current_bindpoint + static_cast<u32>(entries.size());
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 745c3dc0c..bf9560bdc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -73,7 +73,7 @@ public:
     };
 
     /// Maximum supported size that a constbuffer can have in bytes.
-    static constexpr size_t MaxConstbufferSize = 0x10000;
+    static constexpr std::size_t MaxConstbufferSize = 0x10000;
     static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
                   "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
 
@@ -106,7 +106,7 @@ private:
      */
     void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
                                bool preserve_contents = true,
-                               boost::optional<size_t> single_color_target = {});
+                               boost::optional<std::size_t> single_color_target = {});
 
     /*
      * Configures the current constbuffers to use for the draw command.
@@ -159,6 +159,7 @@ private:
     void SyncLogicOpState();
 
     bool has_ARB_direct_state_access = false;
+    bool has_ARB_multi_bind = false;
     bool has_ARB_separate_shader_objects = false;
     bool has_ARB_vertex_attrib_binding = false;
 
@@ -179,12 +180,12 @@ private:
 
     std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
 
-    static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+    static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
     OGLBufferCache buffer_cache;
     OGLFramebuffer framebuffer;
     GLint uniform_buffer_alignment;
 
-    size_t CalculateVertexArraysSize() const;
+    std::size_t CalculateVertexArraysSize() const;
 
     void SetupVertexArrays();
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 29d61eccd..86682d7cb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -53,8 +53,6 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
     params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
     params.unaligned_height = config.tic.Height();
-    params.cache_width = Common::AlignUp(params.width, 8);
-    params.cache_height = Common::AlignUp(params.height, 8);
     params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
 
     switch (params.target) {
@@ -77,7 +75,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     return params;
 }
 
-/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) {
+/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {
     const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
     SurfaceParams params{};
     params.addr = TryGetCpuAddr(config.Address());
@@ -89,8 +87,6 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.width = config.width;
     params.height = config.height;
     params.unaligned_height = config.height;
-    params.cache_width = Common::AlignUp(params.width, 8);
-    params.cache_height = Common::AlignUp(params.height, 8);
     params.target = SurfaceTarget::Texture2D;
     params.depth = 1;
     params.size_in_bytes = params.SizeInBytes();
@@ -110,8 +106,6 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.width = zeta_width;
     params.height = zeta_height;
     params.unaligned_height = zeta_height;
-    params.cache_width = Common::AlignUp(params.width, 8);
-    params.cache_height = Common::AlignUp(params.height, 8);
     params.target = SurfaceTarget::Texture2D;
     params.depth = 1;
     params.size_in_bytes = params.SizeInBytes();
@@ -122,7 +116,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
     {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false},                     // ABGR8S
     {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false},   // ABGR8UI
-    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false},    // B5G6R5U
+    {GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false},   // B5G6R5U
     {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
      false}, // A2B10G10R10U
     {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
@@ -173,6 +167,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                                // RG8S
     {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // RG32UI
     {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // R32UI
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
 
     // Depth formats
     {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
@@ -209,7 +204,7 @@ static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {
 }
 
 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
-    ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
+    ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
     auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
     ASSERT(component_type == format.component_type);
 
@@ -219,6 +214,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
 static bool IsPixelFormatASTC(PixelFormat format) {
     switch (format) {
     case PixelFormat::ASTC_2D_4X4:
+    case PixelFormat::ASTC_2D_8X8:
         return true;
     default:
         return false;
@@ -229,6 +225,8 @@ static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
     switch (format) {
     case PixelFormat::ASTC_2D_4X4:
         return {4, 4};
+    case PixelFormat::ASTC_2D_8X8:
+        return {8, 8};
     default:
         LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
         UNREACHABLE();
@@ -262,7 +260,7 @@ static bool IsFormatBCn(PixelFormat format) {
 }
 
 template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t gl_buffer_size,
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size,
                 VAddr addr) {
     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
@@ -273,7 +271,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t
         const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
         const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
             addr, tile_size, bytes_per_pixel, stride, height, block_height);
-        const size_t size_to_copy{std::min(gl_buffer_size, data.size())};
+        const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
         memcpy(gl_buffer, data.data(), size_to_copy);
     } else {
         // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
@@ -284,7 +282,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t
     }
 }
 
-static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
                             SurfaceParams::MaxPixelFormat>
     morton_to_gl_fns = {
         // clang-format off
@@ -333,6 +331,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
         MortonCopy<true, PixelFormat::RG8S>,
         MortonCopy<true, PixelFormat::RG32UI>,
         MortonCopy<true, PixelFormat::R32UI>,
+        MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
         MortonCopy<true, PixelFormat::Z32F>,
         MortonCopy<true, PixelFormat::Z16>,
         MortonCopy<true, PixelFormat::Z24S8>,
@@ -341,7 +340,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
         // clang-format on
 };
 
-static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
                             SurfaceParams::MaxPixelFormat>
     gl_to_morton_fns = {
         // clang-format off
@@ -392,6 +391,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
         MortonCopy<false, PixelFormat::RG8S>,
         MortonCopy<false, PixelFormat::RG32UI>,
         MortonCopy<false, PixelFormat::R32UI>,
+        nullptr,
         MortonCopy<false, PixelFormat::Z32F>,
         MortonCopy<false, PixelFormat::Z16>,
         MortonCopy<false, PixelFormat::Z24S8>,
@@ -477,30 +477,27 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
         // Only pre-create the texture for non-compressed textures.
         switch (params.target) {
         case SurfaceParams::SurfaceTarget::Texture1D:
-            glTexImage1D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format,
-                         rect.GetWidth(), 0, format_tuple.format, format_tuple.type, nullptr);
+            glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+                           rect.GetWidth());
             break;
         case SurfaceParams::SurfaceTarget::Texture2D:
-            glTexImage2D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format,
-                         rect.GetWidth(), rect.GetHeight(), 0, format_tuple.format,
-                         format_tuple.type, nullptr);
+            glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+                           rect.GetWidth(), rect.GetHeight());
             break;
         case SurfaceParams::SurfaceTarget::Texture3D:
         case SurfaceParams::SurfaceTarget::Texture2DArray:
-            glTexImage3D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format,
-                         rect.GetWidth(), rect.GetHeight(), params.depth, 0, format_tuple.format,
-                         format_tuple.type, nullptr);
+            glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+                           rect.GetWidth(), rect.GetHeight(), params.depth);
             break;
         default:
             LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                          static_cast<u32>(params.target));
             UNREACHABLE();
-            glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, rect.GetWidth(),
-                         rect.GetHeight(), 0, format_tuple.format, format_tuple.type, nullptr);
+            glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(),
+                           rect.GetHeight());
         }
     }
 
-    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL, 0);
     glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
     glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
     glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
@@ -522,9 +519,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
     S8Z24 input_pixel{};
     Z24S8 output_pixel{};
     constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
-    for (size_t y = 0; y < height; ++y) {
-        for (size_t x = 0; x < width; ++x) {
-            const size_t offset{bpp * (y * width + x)};
+    for (std::size_t y = 0; y < height; ++y) {
+        for (std::size_t x = 0; x < width; ++x) {
+            const std::size_t offset{bpp * (y * width + x)};
             std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
             output_pixel.s8.Assign(input_pixel.s8);
             output_pixel.z24.Assign(input_pixel.z24);
@@ -535,9 +532,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
 
 static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
     constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
-    for (size_t y = 0; y < height; ++y) {
-        for (size_t x = 0; x < width; ++x) {
-            const size_t offset{bpp * (y * width + x)};
+    for (std::size_t y = 0; y < height; ++y) {
+        for (std::size_t x = 0; x < width; ++x) {
+            const std::size_t offset{bpp * (y * width + x)};
             const u8 temp{data[offset]};
             data[offset] = data[offset + 1];
             data[offset + 1] = temp;
@@ -553,7 +550,8 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
 static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
                                                u32 width, u32 height) {
     switch (pixel_format) {
-    case PixelFormat::ASTC_2D_4X4: {
+    case PixelFormat::ASTC_2D_4X4:
+    case PixelFormat::ASTC_2D_8X8: {
         // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
         u32 block_width{};
         u32 block_height{};
@@ -600,13 +598,13 @@ void CachedSurface::LoadGLBuffer() {
             UNREACHABLE();
         }
 
-        gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size);
-        morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
+        gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size);
+        morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
             params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
             params.addr);
     } else {
         const u8* const texture_src_data_end{texture_src_data +
-                                             (static_cast<size_t>(params.depth) * copy_size)};
+                                             (static_cast<std::size_t>(params.depth) * copy_size)};
         gl_buffer.assign(texture_src_data, texture_src_data_end);
     }
 
@@ -625,7 +623,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
 
     MICROPROFILE_SCOPE(OpenGL_TextureUL);
 
-    ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height *
+    ASSERT(gl_buffer.size() == static_cast<std::size_t>(params.width) * params.height *
                                    GetGLBytesPerPixel(params.pixel_format) * params.depth);
 
     const auto& rect{params.GetRect()};
@@ -633,8 +631,9 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
     // Load data from memory to the surface
     const GLint x0 = static_cast<GLint>(rect.left);
     const GLint y0 = static_cast<GLint>(rect.bottom);
-    const size_t buffer_offset =
-        static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) *
+    const std::size_t buffer_offset =
+        static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
+                                 static_cast<std::size_t>(x0)) *
         GetGLBytesPerPixel(params.pixel_format);
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
@@ -736,7 +735,7 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
     return GetSurface(depth_params, preserve_contents);
 }
 
-Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) {
+Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {
     const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
 
     ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
@@ -817,20 +816,24 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
     // Get a new surface with the new parameters, and blit the previous surface to it
     Surface new_surface{GetUncachedSurface(new_params)};
 
-    // If format is unchanged, we can do a faster blit without reinterpreting pixel data
-    if (params.pixel_format == new_params.pixel_format) {
+    if (params.pixel_format == new_params.pixel_format ||
+        !Settings::values.use_accurate_framebuffers) {
+        // If the format is the same, just do a framebuffer blit. This is significantly faster than
+        // using PBOs. The is also likely less accurate, as textures will be converted rather than
+        // reinterpreted.
+
         BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
                      params.GetRect(), params.type, read_framebuffer.handle,
                      draw_framebuffer.handle);
-        return new_surface;
-    }
+    } else {
+        // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy,
+        // where pixels are reinterpreted as a new format (without conversion). This code path uses
+        // OpenGL PBOs and is quite slow.
 
-    // When using accurate framebuffers, always copy old data to new surface, regardless of format
-    if (Settings::values.use_accurate_framebuffers) {
         auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
         auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
 
-        size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
+        std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
 
         glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);
         glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
@@ -854,7 +857,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
                 LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
                                   "reinterpretation but the texture is tiled.");
             }
-            size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
+            std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
             std::vector<u8> data(remaining_size);
             Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size());
             glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index e660998d0..d7a4bc37f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -70,19 +70,20 @@ struct SurfaceParams {
         RG8S = 42,
         RG32UI = 43,
         R32UI = 44,
+        ASTC_2D_8X8 = 45,
 
         MaxColorFormat,
 
         // Depth formats
-        Z32F = 45,
-        Z16 = 46,
+        Z32F = 46,
+        Z16 = 47,
 
         MaxDepthFormat,
 
         // DepthStencil formats
-        Z24S8 = 47,
-        S8Z24 = 48,
-        Z32FS8 = 49,
+        Z24S8 = 48,
+        S8Z24 = 49,
+        Z32FS8 = 50,
 
         MaxDepthStencilFormat,
 
@@ -90,7 +91,7 @@ struct SurfaceParams {
         Invalid = 255,
     };
 
-    static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max);
+    static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
 
     enum class ComponentType {
         Invalid = 0,
@@ -192,6 +193,7 @@ struct SurfaceParams {
             1, // RG8S
             1, // RG32UI
             1, // R32UI
+            4, // ASTC_2D_8X8
             1, // Z32F
             1, // Z16
             1, // Z24S8
@@ -199,8 +201,8 @@ struct SurfaceParams {
             1, // Z32FS8
         }};
 
-        ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
-        return compression_factor_table[static_cast<size_t>(format)];
+        ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
+        return compression_factor_table[static_cast<std::size_t>(format)];
     }
 
     static constexpr u32 GetFormatBpp(PixelFormat format) {
@@ -253,6 +255,7 @@ struct SurfaceParams {
             16,  // RG8S
             64,  // RG32UI
             32,  // R32UI
+            16,  // ASTC_2D_8X8
             32,  // Z32F
             16,  // Z16
             32,  // Z24S8
@@ -260,8 +263,8 @@ struct SurfaceParams {
             64,  // Z32FS8
         }};
 
-        ASSERT(static_cast<size_t>(format) < bpp_table.size());
-        return bpp_table[static_cast<size_t>(format)];
+        ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
+        return bpp_table[static_cast<std::size_t>(format)];
     }
 
     u32 GetFormatBpp() const {
@@ -316,6 +319,8 @@ struct SurfaceParams {
             return PixelFormat::R11FG11FB10F;
         case Tegra::RenderTargetFormat::B5G6R5_UNORM:
             return PixelFormat::B5G6R5U;
+        case Tegra::RenderTargetFormat::BGR5A1_UNORM:
+            return PixelFormat::A1B5G5R5U;
         case Tegra::RenderTargetFormat::RGBA32_UINT:
             return PixelFormat::RGBA32UI;
         case Tegra::RenderTargetFormat::R8_UNORM:
@@ -522,6 +527,8 @@ struct SurfaceParams {
             return PixelFormat::BC6H_SF16;
         case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
             return PixelFormat::ASTC_2D_4X4;
+        case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
+            return PixelFormat::ASTC_2D_8X8;
         case Tegra::Texture::TextureFormat::R16_G16:
             switch (component_type) {
             case Tegra::Texture::ComponentType::FLOAT:
@@ -576,6 +583,7 @@ struct SurfaceParams {
         case Tegra::RenderTargetFormat::RG16_UNORM:
         case Tegra::RenderTargetFormat::R16_UNORM:
         case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+        case Tegra::RenderTargetFormat::BGR5A1_UNORM:
         case Tegra::RenderTargetFormat::RG8_UNORM:
         case Tegra::RenderTargetFormat::RGBA16_UNORM:
             return ComponentType::UNorm;
@@ -636,16 +644,18 @@ struct SurfaceParams {
     }
 
     static SurfaceType GetFormatType(PixelFormat pixel_format) {
-        if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) {
+        if (static_cast<std::size_t>(pixel_format) <
+            static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
             return SurfaceType::ColorTexture;
         }
 
-        if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) {
+        if (static_cast<std::size_t>(pixel_format) <
+            static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {
             return SurfaceType::Depth;
         }
 
-        if (static_cast<size_t>(pixel_format) <
-            static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) {
+        if (static_cast<std::size_t>(pixel_format) <
+            static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
             return SurfaceType::DepthStencil;
         }
 
@@ -659,7 +669,7 @@ struct SurfaceParams {
     MathUtil::Rectangle<u32> GetRect() const;
 
     /// Returns the size of this surface in bytes, adjusted for compression
-    size_t SizeInBytes() const {
+    std::size_t SizeInBytes() const {
         const u32 compression_factor{GetCompressionFactor(pixel_format)};
         ASSERT(width % compression_factor == 0);
         ASSERT(height % compression_factor == 0);
@@ -671,7 +681,7 @@ struct SurfaceParams {
     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
 
     /// Creates SurfaceParams from a framebuffer configuration
-    static SurfaceParams CreateForFramebuffer(size_t index);
+    static SurfaceParams CreateForFramebuffer(std::size_t index);
 
     /// Creates SurfaceParams for a depth buffer configuration
     static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
@@ -680,8 +690,8 @@ struct SurfaceParams {
 
     /// Checks if surfaces are compatible for caching
     bool IsCompatibleSurface(const SurfaceParams& other) const {
-        return std::tie(pixel_format, type, cache_width, cache_height) ==
-               std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height);
+        return std::tie(pixel_format, type, width, height) ==
+               std::tie(other.pixel_format, other.type, other.width, other.height);
     }
 
     VAddr addr;
@@ -694,12 +704,8 @@ struct SurfaceParams {
     u32 height;
     u32 depth;
     u32 unaligned_height;
-    size_t size_in_bytes;
+    std::size_t size_in_bytes;
     SurfaceTarget target;
-
-    // Parameters used for caching only
-    u32 cache_width;
-    u32 cache_height;
 };
 
 }; // namespace OpenGL
@@ -715,7 +721,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
 namespace std {
 template <>
 struct hash<SurfaceReserveKey> {
-    size_t operator()(const SurfaceReserveKey& k) const {
+    std::size_t operator()(const SurfaceReserveKey& k) const {
         return k.Hash();
     }
 };
@@ -731,7 +737,7 @@ public:
         return params.addr;
     }
 
-    size_t GetSizeInBytes() const {
+    std::size_t GetSizeInBytes() const {
         return params.size_in_bytes;
     }
 
@@ -779,7 +785,7 @@ public:
     Surface GetDepthBufferSurface(bool preserve_contents);
 
     /// Get the color surface based on the framebuffer configuration and the specified render target
-    Surface GetColorBufferSurface(size_t index, bool preserve_contents);
+    Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
 
     /// Flushes the surface to Switch memory
     void FlushSurface(const Surface& surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 61080f5cc..894fe6eae 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -14,7 +14,7 @@ namespace OpenGL {
 /// Gets the address for the specified shader stage program
 static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
     const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
+    const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
     return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
                                                shader_config.offset);
 }
@@ -28,7 +28,7 @@ static GLShader::ProgramCode GetShaderCode(VAddr addr) {
 
 /// Helper function to set shader uniform block bindings for a single shader stage
 static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
-                                         Maxwell::ShaderStage binding, size_t expected_size) {
+                                         Maxwell::ShaderStage binding, std::size_t expected_size) {
     const GLuint ub_index = glGetUniformBlockIndex(shader, name);
     if (ub_index == GL_INVALID_INDEX) {
         return;
@@ -36,7 +36,7 @@ static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
 
     GLint ub_size = 0;
     glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
-    ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size,
+    ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
                "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
     glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
 }
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6e6febcbc..9bafe43a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -28,7 +28,7 @@ public:
     }
 
     /// Gets the size of the shader in guest memory, required for cache management
-    size_t GetSizeInBytes() const {
+    std::size_t GetSizeInBytes() const {
         return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 434faf9d4..a1638c12e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -190,7 +190,7 @@ public:
 
 private:
     void AppendIndentation() {
-        shader_source.append(static_cast<size_t>(scope) * 4, ' ');
+        shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
     }
 
     std::string shader_source;
@@ -209,7 +209,7 @@ public:
         UnsignedInteger,
     };
 
-    GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
+    GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
 
     /// Gets the GLSL type string for a register
     static std::string GetTypeString() {
@@ -227,12 +227,12 @@ public:
     }
 
     /// Returns the index of the register
-    size_t GetIndex() const {
+    std::size_t GetIndex() const {
         return index;
     }
 
 private:
-    const size_t index;
+    const std::size_t index;
     const std::string& suffix;
 };
 
@@ -469,7 +469,7 @@ public:
     /// necessary.
     std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
                               bool is_array) {
-        const size_t offset = static_cast<size_t>(sampler.index.Value());
+        const std::size_t offset = static_cast<std::size_t>(sampler.index.Value());
 
         // If this sampler has already been used, return the existing mapping.
         const auto itr =
@@ -482,7 +482,7 @@ public:
         }
 
         // Otherwise create a new mapping for this sampler
-        const size_t next_index = used_samplers.size();
+        const std::size_t next_index = used_samplers.size();
         const SamplerEntry entry{stage, offset, next_index, type, is_array};
         used_samplers.emplace_back(entry);
         return entry.GetName();
@@ -532,7 +532,7 @@ private:
     void BuildRegisterList() {
         regs.reserve(Register::NumRegisters);
 
-        for (size_t index = 0; index < Register::NumRegisters; ++index) {
+        for (std::size_t index = 0; index < Register::NumRegisters; ++index) {
             regs.emplace_back(index, suffix);
         }
     }
@@ -846,7 +846,7 @@ private:
      */
     bool IsSchedInstruction(u32 offset) const {
         // sched instructions appear once every 4 instructions.
-        static constexpr size_t SchedPeriod = 4;
+        static constexpr std::size_t SchedPeriod = 4;
         u32 absolute_offset = offset - main_offset;
 
         return (absolute_offset % SchedPeriod) == 0;
@@ -914,7 +914,7 @@ private:
         std::string result;
         result += '(';
 
-        for (size_t i = 0; i < shift_amounts.size(); ++i) {
+        for (std::size_t i = 0; i < shift_amounts.size(); ++i) {
             if (i)
                 result += '|';
             result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] +
@@ -940,7 +940,7 @@ private:
 
         ASSERT_MSG(instr.texs.nodep == 0, "TEXS nodep not implemented");
 
-        size_t written_components = 0;
+        std::size_t written_components = 0;
         for (u32 component = 0; component < 4; ++component) {
             if (!instr.texs.IsComponentEnabled(component)) {
                 continue;
@@ -1487,6 +1487,71 @@ private:
                                           1, 1);
                 break;
             }
+            case OpCode::Id::LEA_R2:
+            case OpCode::Id::LEA_R1:
+            case OpCode::Id::LEA_IMM:
+            case OpCode::Id::LEA_RZ:
+            case OpCode::Id::LEA_HI: {
+                std::string op_c;
+
+                switch (opcode->GetId()) {
+                case OpCode::Id::LEA_R2: {
+                    op_a = regs.GetRegisterAsInteger(instr.gpr20);
+                    op_b = regs.GetRegisterAsInteger(instr.gpr39);
+                    op_c = std::to_string(instr.lea.r2.entry_a);
+                    break;
+                }
+
+                case OpCode::Id::LEA_R1: {
+                    const bool neg = instr.lea.r1.neg != 0;
+                    op_a = regs.GetRegisterAsInteger(instr.gpr8);
+                    if (neg)
+                        op_a = "-(" + op_a + ')';
+                    op_b = regs.GetRegisterAsInteger(instr.gpr20);
+                    op_c = std::to_string(instr.lea.r1.entry_a);
+                    break;
+                }
+
+                case OpCode::Id::LEA_IMM: {
+                    const bool neg = instr.lea.imm.neg != 0;
+                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
+                    if (neg)
+                        op_b = "-(" + op_b + ')';
+                    op_a = std::to_string(instr.lea.imm.entry_a);
+                    op_c = std::to_string(instr.lea.imm.entry_b);
+                    break;
+                }
+
+                case OpCode::Id::LEA_RZ: {
+                    const bool neg = instr.lea.rz.neg != 0;
+                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
+                    if (neg)
+                        op_b = "-(" + op_b + ')';
+                    op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset,
+                                           GLSLRegister::Type::Integer);
+                    op_c = std::to_string(instr.lea.rz.entry_a);
+
+                    break;
+                }
+
+                case OpCode::Id::LEA_HI:
+                default: {
+                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
+                    op_a = std::to_string(instr.lea.imm.entry_a);
+                    op_c = std::to_string(instr.lea.imm.entry_b);
+                    LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName());
+                    UNREACHABLE();
+                }
+                }
+                if (instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex)) {
+                    LOG_ERROR(HW_GPU, "Unhandled LEA Predicate");
+                    UNREACHABLE();
+                }
+                const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1);
+
+                break;
+            }
             default: {
                 LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
                              opcode->GetName());
@@ -1687,13 +1752,34 @@ private:
         case OpCode::Type::Memory: {
             switch (opcode->GetId()) {
             case OpCode::Id::LD_A: {
-                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
+                ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+                           "Indirect attribute loads are not supported");
+                ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+                           "Unaligned attribute loads are not supported");
 
                 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
                                                   Tegra::Shader::IpaSampleMode::Default};
-                regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
-                                                instr.attribute.fmt20.index, input_mode);
+
+                u32 next_element = instr.attribute.fmt20.element;
+                u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
+
+                const auto LoadNextElement = [&](u32 reg_offset) {
+                    regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
+                                                    static_cast<Attribute::Index>(next_index),
+                                                    input_mode);
+
+                    // Load the next attribute element into the following register. If the element
+                    // to load goes beyond the vec4 size, load the first element of the next
+                    // attribute.
+                    next_element = (next_element + 1) % 4;
+                    next_index = next_index + (next_element == 0 ? 1 : 0);
+                };
+
+                const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+                for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+                    LoadNextElement(reg_offset);
+                }
                 break;
             }
             case OpCode::Id::LD_C: {
@@ -1735,9 +1821,31 @@ private:
                 break;
             }
             case OpCode::Id::ST_A: {
-                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
-                regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
-                                                  instr.attribute.fmt20.element, instr.gpr0);
+                ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+                           "Indirect attribute loads are not supported");
+                ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+                           "Unaligned attribute loads are not supported");
+
+                u32 next_element = instr.attribute.fmt20.element;
+                u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
+
+                const auto StoreNextElement = [&](u32 reg_offset) {
+                    regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
+                                                      next_element,
+                                                      instr.gpr0.Value() + reg_offset);
+
+                    // Load the next attribute element into the following register. If the element
+                    // to load goes beyond the vec4 size, load the first element of the next
+                    // attribute.
+                    next_element = (next_element + 1) % 4;
+                    next_index = next_index + (next_element == 0 ? 1 : 0);
+                };
+
+                const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+                for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+                    StoreNextElement(reg_offset);
+                }
+
                 break;
             }
             case OpCode::Id::TEX: {
@@ -1768,17 +1876,49 @@ private:
                     coord = "vec2 coords = vec2(" + x + ", " + y + ");";
                     texture_type = Tegra::Shader::TextureType::Texture2D;
                 }
+                // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias
+                // or lod.
+                const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
 
                 const std::string sampler = GetSampler(instr.sampler, texture_type, false);
                 // Add an extra scope and declare the texture coords inside to prevent
                 // overwriting them in case they are used as outputs of the texs instruction.
+
                 shader.AddLine("{");
                 ++shader.scope;
                 shader.AddLine(coord);
-                const std::string texture = "texture(" + sampler + ", coords)";
+                std::string texture;
 
-                size_t dest_elem{};
-                for (size_t elem = 0; elem < 4; ++elem) {
+                switch (instr.tex.process_mode) {
+                case Tegra::Shader::TextureProcessMode::None: {
+                    texture = "texture(" + sampler + ", coords)";
+                    break;
+                }
+                case Tegra::Shader::TextureProcessMode::LZ: {
+                    texture = "textureLod(" + sampler + ", coords, 0.0)";
+                    break;
+                }
+                case Tegra::Shader::TextureProcessMode::LB:
+                case Tegra::Shader::TextureProcessMode::LBA: {
+                    // TODO: Figure if A suffix changes the equation at all.
+                    texture = "texture(" + sampler + ", coords, " + op_c + ')';
+                    break;
+                }
+                case Tegra::Shader::TextureProcessMode::LL:
+                case Tegra::Shader::TextureProcessMode::LLA: {
+                    // TODO: Figure if A suffix changes the equation at all.
+                    texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
+                    break;
+                }
+                default: {
+                    texture = "texture(" + sampler + ", coords)";
+                    LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}",
+                                 static_cast<u32>(instr.tex.process_mode.Value()));
+                    UNREACHABLE();
+                }
+                }
+                std::size_t dest_elem{};
+                for (std::size_t elem = 0; elem < 4; ++elem) {
                     if (!instr.tex.IsComponentEnabled(elem)) {
                         // Skip disabled components
                         continue;
@@ -1882,8 +2022,8 @@ private:
                 const std::string texture = "textureGather(" + sampler + ", coords, " +
                                             std::to_string(instr.tld4.component) + ')';
 
-                size_t dest_elem{};
-                for (size_t elem = 0; elem < 4; ++elem) {
+                std::size_t dest_elem{};
+                for (std::size_t elem = 0; elem < 4; ++elem) {
                     if (!instr.tex.IsComponentEnabled(elem)) {
                         // Skip disabled components
                         continue;
@@ -2069,6 +2209,30 @@ private:
             }
             break;
         }
+        case OpCode::Type::PredicateSetRegister: {
+            const std::string op_a =
+                GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
+            const std::string op_b =
+                GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0);
+
+            const std::string second_pred =
+                GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0);
+
+            const std::string combiner = GetPredicateCombiner(instr.pset.op);
+
+            const std::string predicate =
+                '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')';
+            const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
+            if (instr.pset.bf == 0) {
+                const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
+                regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1);
+            } else {
+                const std::string value = '(' + result + ") ? 1.0 : 0.0";
+                regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
+            }
+
+            break;
+        }
         case OpCode::Type::PredicateSetPredicate: {
             const std::string op_a =
                 GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index a43e2997b..d53b93ad5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -13,7 +13,7 @@
 
 namespace OpenGL::GLShader {
 
-constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
+constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
 using ProgramCode = std::vector<u64>;
 
 class ConstBufferEntry {
@@ -51,7 +51,7 @@ public:
     }
 
     std::string GetName() const {
-        return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index);
+        return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
     }
 
     u32 GetHash() const {
@@ -74,15 +74,15 @@ class SamplerEntry {
     using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 public:
-    SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index,
+    SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
                  Tegra::Shader::TextureType type, bool is_array)
         : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {}
 
-    size_t GetOffset() const {
+    std::size_t GetOffset() const {
         return offset;
     }
 
-    size_t GetIndex() const {
+    std::size_t GetIndex() const {
         return sampler_index;
     }
 
@@ -91,7 +91,7 @@ public:
     }
 
     std::string GetName() const {
-        return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' +
+        return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
                std::to_string(sampler_index);
     }
 
@@ -133,7 +133,7 @@ public:
     }
 
     static std::string GetArrayName(Maxwell::ShaderStage stage) {
-        return TextureSamplerNames[static_cast<size_t>(stage)];
+        return TextureSamplerNames[static_cast<std::size_t>(stage)];
     }
 
 private:
@@ -143,9 +143,9 @@ private:
 
     /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
     /// instruction.
-    size_t offset;
+    std::size_t offset;
     Maxwell::ShaderStage stage;      ///< Shader stage where this sampler was used.
-    size_t sampler_index;            ///< Value used to index into the generated GLSL sampler array.
+    std::size_t sampler_index;       ///< Value used to index into the generated GLSL sampler array.
     Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
     bool is_array; ///< Whether the texture is being sampled as an array texture or not.
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 533e42caa..b86cd96e8 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,7 +12,7 @@
 namespace OpenGL::GLShader {
 
 /// Number of OpenGL texture samplers that can be used in the fragment shader
-static constexpr size_t NumTextureSamplers = 32;
+static constexpr std::size_t NumTextureSamplers = 32;
 
 using Tegra::Engines::Maxwell3D;
 
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 6f70deb96..af99132ba 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -272,7 +272,7 @@ void OpenGLState::Apply() const {
     }
 
     // Clip distance
-    for (size_t i = 0; i < clip_distance.size(); ++i) {
+    for (std::size_t i = 0; i < clip_distance.size(); ++i) {
         if (clip_distance[i] != cur_state.clip_distance[i]) {
             if (clip_distance[i]) {
                 glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index aadf68f16..664f3ca20 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -61,7 +61,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
     mapped_size = size;
 
     if (alignment > 0) {
-        buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment);
+        buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
     }
 
     bool invalidate = false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index ccff3e342..96d916b07 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -369,6 +369,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
  * Draws the emulated screens to the emulator window.
  */
 void RendererOpenGL::DrawScreen() {
+    if (renderer_settings.set_background_color) {
+        // Update background color before drawing
+        glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
+                     0.0f);
+    }
+
     const auto& layout = render_window.GetFramebufferLayout();
     const auto& screen = layout.screen;
 
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 272294c62..20ba6d4f6 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -46,6 +46,48 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
     }
 }
 
+template <std::size_t N, std::size_t M>
+struct alignas(64) SwizzleTable {
+    constexpr SwizzleTable() {
+        for (u32 y = 0; y < N; ++y) {
+            for (u32 x = 0; x < M; ++x) {
+                const u32 x2 = x * 16;
+                values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
+                                                ((x2 % 32) / 16) * 32 + (y % 2) * 16);
+            }
+        }
+    }
+    const std::array<u16, M>& operator[](std::size_t index) const {
+        return values[index];
+    }
+    std::array<std::array<u16, M>, N> values{};
+};
+
+constexpr auto swizzle_table = SwizzleTable<8, 4>();
+
+void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_data,
+                     u8* unswizzled_data, bool unswizzle, u32 block_height) {
+    std::array<u8*, 2> data_ptrs;
+    const std::size_t stride{width * bytes_per_pixel};
+    const std::size_t image_width_in_gobs{(stride + 63) / 64};
+    const std::size_t copy_size{16};
+    for (std::size_t y = 0; y < height; ++y) {
+        const std::size_t initial_gob =
+            (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs +
+            (y % (8 * block_height) / 8) * 512;
+        const std::size_t pixel_base{y * width * bytes_per_pixel};
+        const auto& table = swizzle_table[y % 8];
+        for (std::size_t xb = 0; xb < stride; xb += copy_size) {
+            const std::size_t gob_address{initial_gob + (xb / 64) * 512 * block_height};
+            const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]};
+            const std::size_t pixel_index{xb + pixel_base};
+            data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
+            data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
+            std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
+        }
+    }
+}
+
 u32 BytesPerPixel(TextureFormat format) {
     switch (format) {
     case TextureFormat::DXT1:
@@ -63,6 +105,7 @@ u32 BytesPerPixel(TextureFormat format) {
     case TextureFormat::R32_G32_B32:
         return 12;
     case TextureFormat::ASTC_2D_4X4:
+    case TextureFormat::ASTC_2D_8X8:
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
     case TextureFormat::BF10GF11RF11:
@@ -91,8 +134,13 @@ u32 BytesPerPixel(TextureFormat format) {
 std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
                                  u32 height, u32 block_height) {
     std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
-    CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
-                     Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
+    if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) {
+        FastSwizzleData(width / tile_size, height / tile_size, bytes_per_pixel,
+                        Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
+    } else {
+        CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
+                         Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
+    }
     return unswizzled_data;
 }
 
@@ -111,6 +159,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
     case TextureFormat::BC6H_UF16:
     case TextureFormat::BC6H_SF16:
     case TextureFormat::ASTC_2D_4X4:
+    case TextureFormat::ASTC_2D_8X8:
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
     case TextureFormat::A1B5G5R5:
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index c43e79e78..d229225b4 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -95,6 +95,8 @@ void Config::ReadValues() {
 
     qt_config->beginGroup("Audio");
     Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString();
+    Settings::values.enable_audio_stretching =
+        qt_config->value("enable_audio_stretching", true).toBool();
     Settings::values.audio_device_id =
         qt_config->value("output_device", "auto").toString().toStdString();
     Settings::values.volume = qt_config->value("volume", 1).toFloat();
@@ -230,6 +232,7 @@ void Config::SaveValues() {
 
     qt_config->beginGroup("Audio");
     qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id));
+    qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching);
     qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id));
     qt_config->setValue("volume", Settings::values.volume);
     qt_config->endGroup();
diff --git a/src/yuzu/configuration/configure_audio.cpp b/src/yuzu/configuration/configure_audio.cpp
index fbb813f6c..6ea59f2a3 100644
--- a/src/yuzu/configuration/configure_audio.cpp
+++ b/src/yuzu/configuration/configure_audio.cpp
@@ -46,6 +46,8 @@ void ConfigureAudio::setConfiguration() {
     }
     ui->output_sink_combo_box->setCurrentIndex(new_sink_index);
 
+    ui->toggle_audio_stretching->setChecked(Settings::values.enable_audio_stretching);
+
     // The device list cannot be pre-populated (nor listed) until the output sink is known.
     updateAudioDevices(new_sink_index);
 
@@ -67,6 +69,7 @@ void ConfigureAudio::applyConfiguration() {
     Settings::values.sink_id =
         ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex())
             .toStdString();
+    Settings::values.enable_audio_stretching = ui->toggle_audio_stretching->isChecked();
     Settings::values.audio_device_id =
         ui->audio_device_combo_box->itemText(ui->audio_device_combo_box->currentIndex())
             .toStdString();
diff --git a/src/yuzu/configuration/configure_audio.ui b/src/yuzu/configuration/configure_audio.ui
index ef67890dc..a29a0e265 100644
--- a/src/yuzu/configuration/configure_audio.ui
+++ b/src/yuzu/configuration/configure_audio.ui
@@ -31,6 +31,16 @@
         </item>
        </layout>
       </item>
+       <item>
+         <widget class="QCheckBox" name="toggle_audio_stretching">
+           <property name="toolTip">
+             <string>This post-processing effect adjusts audio speed to match emulation speed and helps prevent audio stutter. This however increases audio latency.</string>
+           </property>
+           <property name="text">
+             <string>Enable audio stretching</string>
+           </property>
+         </widget>
+       </item>
       <item>
        <layout class="QHBoxLayout">
         <item>
diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_gamelist.cpp
index 1ae3423cf..8743ce982 100644
--- a/src/yuzu/configuration/configure_gamelist.cpp
+++ b/src/yuzu/configuration/configure_gamelist.cpp
@@ -2,47 +2,51 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "core/core.h"
+#include <array>
+#include <utility>
+
+#include "common/common_types.h"
 #include "core/settings.h"
 #include "ui_configure_gamelist.h"
-#include "ui_settings.h"
 #include "yuzu/configuration/configure_gamelist.h"
+#include "yuzu/ui_settings.h"
+
+namespace {
+constexpr std::array<std::pair<u32, const char*>, 5> default_icon_sizes{{
+    std::make_pair(0, QT_TR_NOOP("None")),
+    std::make_pair(32, QT_TR_NOOP("Small (32x32)")),
+    std::make_pair(64, QT_TR_NOOP("Standard (64x64)")),
+    std::make_pair(128, QT_TR_NOOP("Large (128x128)")),
+    std::make_pair(256, QT_TR_NOOP("Full Size (256x256)")),
+}};
+
+constexpr std::array<const char*, 4> row_text_names{{
+    QT_TR_NOOP("Filename"),
+    QT_TR_NOOP("Filetype"),
+    QT_TR_NOOP("Title ID"),
+    QT_TR_NOOP("Title Name"),
+}};
+} // Anonymous namespace
 
 ConfigureGameList::ConfigureGameList(QWidget* parent)
     : QWidget(parent), ui(new Ui::ConfigureGameList) {
     ui->setupUi(this);
 
-    static const std::vector<std::pair<u32, std::string>> default_icon_sizes{
-        std::make_pair(0, "None"),        std::make_pair(32, "Small"),
-        std::make_pair(64, "Standard"),   std::make_pair(128, "Large"),
-        std::make_pair(256, "Full Size"),
-    };
-
-    for (const auto& size : default_icon_sizes) {
-        ui->icon_size_combobox->addItem(QString::fromStdString(size.second + " (" +
-                                                               std::to_string(size.first) + "x" +
-                                                               std::to_string(size.first) + ")"),
-                                        size.first);
-    }
-
-    static const std::vector<std::string> row_text_names{
-        "Filename",
-        "Filetype",
-        "Title ID",
-        "Title Name",
-    };
-
-    for (size_t i = 0; i < row_text_names.size(); ++i) {
-        ui->row_1_text_combobox->addItem(QString::fromStdString(row_text_names[i]),
-                                         QVariant::fromValue(i));
-        ui->row_2_text_combobox->addItem(QString::fromStdString(row_text_names[i]),
-                                         QVariant::fromValue(i));
-    }
+    InitializeIconSizeComboBox();
+    InitializeRowComboBoxes();
 
     this->setConfiguration();
 }
 
-ConfigureGameList::~ConfigureGameList() {}
+ConfigureGameList::~ConfigureGameList() = default;
+
+void ConfigureGameList::applyConfiguration() {
+    UISettings::values.show_unknown = ui->show_unknown->isChecked();
+    UISettings::values.icon_size = ui->icon_size_combobox->currentData().toUInt();
+    UISettings::values.row_1_text_id = ui->row_1_text_combobox->currentData().toUInt();
+    UISettings::values.row_2_text_id = ui->row_2_text_combobox->currentData().toUInt();
+    Settings::Apply();
+}
 
 void ConfigureGameList::setConfiguration() {
     ui->show_unknown->setChecked(UISettings::values.show_unknown);
@@ -54,10 +58,39 @@ void ConfigureGameList::setConfiguration() {
         ui->row_2_text_combobox->findData(UISettings::values.row_2_text_id));
 }
 
-void ConfigureGameList::applyConfiguration() {
-    UISettings::values.show_unknown = ui->show_unknown->isChecked();
-    UISettings::values.icon_size = ui->icon_size_combobox->currentData().toUInt();
-    UISettings::values.row_1_text_id = ui->row_1_text_combobox->currentData().toUInt();
-    UISettings::values.row_2_text_id = ui->row_2_text_combobox->currentData().toUInt();
-    Settings::Apply();
+void ConfigureGameList::changeEvent(QEvent* event) {
+    if (event->type() == QEvent::LanguageChange) {
+        RetranslateUI();
+        return;
+    }
+
+    QWidget::changeEvent(event);
+}
+
+void ConfigureGameList::RetranslateUI() {
+    ui->retranslateUi(this);
+
+    for (int i = 0; i < ui->icon_size_combobox->count(); i++) {
+        ui->icon_size_combobox->setItemText(i, tr(default_icon_sizes[i].second));
+    }
+
+    for (int i = 0; i < ui->row_1_text_combobox->count(); i++) {
+        const QString name = tr(row_text_names[i]);
+
+        ui->row_1_text_combobox->setItemText(i, name);
+        ui->row_2_text_combobox->setItemText(i, name);
+    }
+}
+
+void ConfigureGameList::InitializeIconSizeComboBox() {
+    for (const auto& size : default_icon_sizes) {
+        ui->icon_size_combobox->addItem(size.second, size.first);
+    }
+}
+
+void ConfigureGameList::InitializeRowComboBoxes() {
+    for (std::size_t i = 0; i < row_text_names.size(); ++i) {
+        ui->row_1_text_combobox->addItem(row_text_names[i], QVariant::fromValue(i));
+        ui->row_2_text_combobox->addItem(row_text_names[i], QVariant::fromValue(i));
+    }
 }
diff --git a/src/yuzu/configuration/configure_gamelist.h b/src/yuzu/configuration/configure_gamelist.h
index 94fba6373..ff7406c60 100644
--- a/src/yuzu/configuration/configure_gamelist.h
+++ b/src/yuzu/configuration/configure_gamelist.h
@@ -23,6 +23,11 @@ public:
 private:
     void setConfiguration();
 
-private:
+    void changeEvent(QEvent*) override;
+    void RetranslateUI();
+
+    void InitializeIconSizeComboBox();
+    void InitializeRowComboBoxes();
+
     std::unique_ptr<Ui::ConfigureGameList> ui;
 };
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index ee1287028..839d58f59 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <QColorDialog>
 #include "core/core.h"
 #include "core/settings.h"
 #include "ui_configure_graphics.h"
@@ -16,6 +17,14 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
     ui->frame_limit->setEnabled(Settings::values.use_frame_limit);
     connect(ui->toggle_frame_limit, &QCheckBox::stateChanged, ui->frame_limit,
             &QSpinBox::setEnabled);
+    connect(ui->bg_button, &QPushButton::clicked, this, [this] {
+        const QColor new_bg_color = QColorDialog::getColor(bg_color);
+        if (!new_bg_color.isValid())
+            return;
+        bg_color = new_bg_color;
+        ui->bg_button->setStyleSheet(
+            QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
+    });
 }
 
 ConfigureGraphics::~ConfigureGraphics() = default;
@@ -65,6 +74,10 @@ void ConfigureGraphics::setConfiguration() {
     ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
     ui->frame_limit->setValue(Settings::values.frame_limit);
     ui->use_accurate_framebuffers->setChecked(Settings::values.use_accurate_framebuffers);
+    bg_color = QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
+                                Settings::values.bg_blue);
+    ui->bg_button->setStyleSheet(
+        QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
 }
 
 void ConfigureGraphics::applyConfiguration() {
@@ -73,4 +86,7 @@ void ConfigureGraphics::applyConfiguration() {
     Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
     Settings::values.frame_limit = ui->frame_limit->value();
     Settings::values.use_accurate_framebuffers = ui->use_accurate_framebuffers->isChecked();
+    Settings::values.bg_red = static_cast<float>(bg_color.redF());
+    Settings::values.bg_green = static_cast<float>(bg_color.greenF());
+    Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
 }
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index 5497a55f7..9bda26fd6 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -25,4 +25,5 @@ private:
 
 private:
     std::unique_ptr<Ui::ConfigureGraphics> ui;
+    QColor bg_color;
 };
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 3bc18c26e..8fc00af1b 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -96,6 +96,27 @@
           </item>
          </layout>
         </item>
+         <item>
+           <layout class="QHBoxLayout" name="horizontalLayout_6">
+             <item>
+               <widget class="QLabel" name="bg_label">
+                 <property name="text">
+                   <string>Background Color:</string>
+                 </property>
+               </widget>
+             </item>
+             <item>
+               <widget class="QPushButton" name="bg_button">
+                 <property name="maximumSize">
+                   <size>
+                     <width>40</width>
+                     <height>16777215</height>
+                   </size>
+                 </property>
+               </widget>
+             </item>
+           </layout>
+         </item>
        </layout>
       </widget>
      </item>
diff --git a/src/yuzu/debugger/graphics/graphics_breakpoints.cpp b/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
index fe682b3b8..b5c88f944 100644
--- a/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
+++ b/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
@@ -42,7 +42,8 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const {
                  tr("Finished primitive batch")},
             };
 
-            DEBUG_ASSERT(map.size() == static_cast<size_t>(Tegra::DebugContext::Event::NumEvents));
+            DEBUG_ASSERT(map.size() ==
+                         static_cast<std::size_t>(Tegra::DebugContext::Event::NumEvents));
             return (map.find(event) != map.end()) ? map.at(event) : QString();
         }
 
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 7e37962d5..cbcd5dd5f 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -341,8 +341,8 @@ void GraphicsSurfaceWidget::OnUpdate() {
         // directly...
 
         const auto& registers = gpu.Maxwell3D().regs;
-        const auto& rt = registers.rt[static_cast<size_t>(surface_source) -
-                                      static_cast<size_t>(Source::RenderTarget0)];
+        const auto& rt = registers.rt[static_cast<std::size_t>(surface_source) -
+                                      static_cast<std::size_t>(Source::RenderTarget0)];
 
         surface_address = rt.Address();
         surface_width = rt.width;
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 6c2cd967e..f2a7e23f0 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -117,7 +117,7 @@ QString WaitTreeCallstack::GetText() const {
 std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() const {
     std::vector<std::unique_ptr<WaitTreeItem>> list;
 
-    constexpr size_t BaseRegister = 29;
+    constexpr std::size_t BaseRegister = 29;
     u64 base_pointer = thread.context.cpu_registers[BaseRegister];
 
     while (base_pointer != 0) {
@@ -213,35 +213,35 @@ QString WaitTreeThread::GetText() const {
     const auto& thread = static_cast<const Kernel::Thread&>(object);
     QString status;
     switch (thread.status) {
-    case ThreadStatus::Running:
+    case Kernel::ThreadStatus::Running:
         status = tr("running");
         break;
-    case ThreadStatus::Ready:
+    case Kernel::ThreadStatus::Ready:
         status = tr("ready");
         break;
-    case ThreadStatus::WaitHLEEvent:
+    case Kernel::ThreadStatus::WaitHLEEvent:
         status = tr("waiting for HLE return");
         break;
-    case ThreadStatus::WaitSleep:
+    case Kernel::ThreadStatus::WaitSleep:
         status = tr("sleeping");
         break;
-    case ThreadStatus::WaitIPC:
+    case Kernel::ThreadStatus::WaitIPC:
         status = tr("waiting for IPC reply");
         break;
-    case ThreadStatus::WaitSynchAll:
-    case ThreadStatus::WaitSynchAny:
+    case Kernel::ThreadStatus::WaitSynchAll:
+    case Kernel::ThreadStatus::WaitSynchAny:
         status = tr("waiting for objects");
         break;
-    case ThreadStatus::WaitMutex:
+    case Kernel::ThreadStatus::WaitMutex:
         status = tr("waiting for mutex");
         break;
-    case ThreadStatus::WaitArb:
+    case Kernel::ThreadStatus::WaitArb:
         status = tr("waiting for address arbiter");
         break;
-    case ThreadStatus::Dormant:
+    case Kernel::ThreadStatus::Dormant:
         status = tr("dormant");
         break;
-    case ThreadStatus::Dead:
+    case Kernel::ThreadStatus::Dead:
         status = tr("dead");
         break;
     }
@@ -254,23 +254,23 @@ QString WaitTreeThread::GetText() const {
 QColor WaitTreeThread::GetColor() const {
     const auto& thread = static_cast<const Kernel::Thread&>(object);
     switch (thread.status) {
-    case ThreadStatus::Running:
+    case Kernel::ThreadStatus::Running:
         return QColor(Qt::GlobalColor::darkGreen);
-    case ThreadStatus::Ready:
+    case Kernel::ThreadStatus::Ready:
         return QColor(Qt::GlobalColor::darkBlue);
-    case ThreadStatus::WaitHLEEvent:
-    case ThreadStatus::WaitIPC:
+    case Kernel::ThreadStatus::WaitHLEEvent:
+    case Kernel::ThreadStatus::WaitIPC:
         return QColor(Qt::GlobalColor::darkRed);
-    case ThreadStatus::WaitSleep:
+    case Kernel::ThreadStatus::WaitSleep:
         return QColor(Qt::GlobalColor::darkYellow);
-    case ThreadStatus::WaitSynchAll:
-    case ThreadStatus::WaitSynchAny:
-    case ThreadStatus::WaitMutex:
-    case ThreadStatus::WaitArb:
+    case Kernel::ThreadStatus::WaitSynchAll:
+    case Kernel::ThreadStatus::WaitSynchAny:
+    case Kernel::ThreadStatus::WaitMutex:
+    case Kernel::ThreadStatus::WaitArb:
         return QColor(Qt::GlobalColor::red);
-    case ThreadStatus::Dormant:
+    case Kernel::ThreadStatus::Dormant:
         return QColor(Qt::GlobalColor::darkCyan);
-    case ThreadStatus::Dead:
+    case Kernel::ThreadStatus::Dead:
         return QColor(Qt::GlobalColor::gray);
     default:
         return WaitTreeItem::GetColor();
@@ -284,13 +284,13 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
 
     QString processor;
     switch (thread.processor_id) {
-    case ThreadProcessorId::THREADPROCESSORID_DEFAULT:
+    case Kernel::ThreadProcessorId::THREADPROCESSORID_DEFAULT:
         processor = tr("default");
         break;
-    case ThreadProcessorId::THREADPROCESSORID_0:
-    case ThreadProcessorId::THREADPROCESSORID_1:
-    case ThreadProcessorId::THREADPROCESSORID_2:
-    case ThreadProcessorId::THREADPROCESSORID_3:
+    case Kernel::ThreadProcessorId::THREADPROCESSORID_0:
+    case Kernel::ThreadProcessorId::THREADPROCESSORID_1:
+    case Kernel::ThreadProcessorId::THREADPROCESSORID_2:
+    case Kernel::ThreadProcessorId::THREADPROCESSORID_3:
         processor = tr("core %1").arg(thread.processor_id);
         break;
     default:
@@ -314,8 +314,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
     else
         list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex")));
 
-    if (thread.status == ThreadStatus::WaitSynchAny ||
-        thread.status == ThreadStatus::WaitSynchAll) {
+    if (thread.status == Kernel::ThreadStatus::WaitSynchAny ||
+        thread.status == Kernel::ThreadStatus::WaitSynchAll) {
         list.push_back(std::make_unique<WaitTreeObjectList>(thread.wait_objects,
                                                             thread.IsSleepingOnWaitAll()));
     }
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 8c6e16d47..3b3b551bb 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -366,7 +366,7 @@ void GameList::LoadCompatibilityList() {
     QJsonDocument json = QJsonDocument::fromJson(string_content.toUtf8());
     QJsonArray arr = json.array();
 
-    for (const QJsonValue& value : arr) {
+    for (const QJsonValueRef& value : arr) {
         QJsonObject game = value.toObject();
 
         if (game.contains("compatibility") && game["compatibility"].isDouble()) {
@@ -374,9 +374,9 @@ void GameList::LoadCompatibilityList() {
             QString directory = game["directory"].toString();
             QJsonArray ids = game["releases"].toArray();
 
-            for (const QJsonValue& value : ids) {
-                QJsonObject object = value.toObject();
-                QString id = object["id"].toString();
+            for (const QJsonValueRef& id_ref : ids) {
+                QJsonObject id_object = id_ref.toObject();
+                QString id = id_object["id"].toString();
                 compatibility_list.emplace(
                     id.toUpper().toStdString(),
                     std::make_pair(QString::number(compatibility), directory));
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index f22e422e5..b6272d536 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -106,7 +106,7 @@ class GameListItemCompat : public GameListItem {
 public:
     static const int CompatNumberRole = Qt::UserRole + 1;
     GameListItemCompat() = default;
-    explicit GameListItemCompat(const QString& compatiblity) {
+    explicit GameListItemCompat(const QString& compatibility) {
         struct CompatStatus {
             QString color;
             const char* text;
@@ -123,13 +123,13 @@ public:
         {"99", {"#000000", QT_TR_NOOP("Not Tested"), QT_TR_NOOP("The game has not yet been tested.")}}};
         // clang-format on
 
-        auto iterator = status_data.find(compatiblity);
+        auto iterator = status_data.find(compatibility);
         if (iterator == status_data.end()) {
-            LOG_WARNING(Frontend, "Invalid compatibility number {}", compatiblity.toStdString());
+            LOG_WARNING(Frontend, "Invalid compatibility number {}", compatibility.toStdString());
             return;
         }
-        CompatStatus status = iterator->second;
-        setData(compatiblity, CompatNumberRole);
+        const CompatStatus& status = iterator->second;
+        setData(compatibility, CompatNumberRole);
         setText(QObject::tr(status.text));
         setToolTip(QObject::tr(status.tooltip));
         setData(CreateCirclePixmapFromColor(status.color), Qt::DecorationRole);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index e36914f14..45bb1d1d1 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -447,6 +447,10 @@ QStringList GMainWindow::GetUnsupportedGLExtensions() {
         unsupported_ext.append("ARB_texture_mirror_clamp_to_edge");
     if (!GLAD_GL_ARB_base_instance)
         unsupported_ext.append("ARB_base_instance");
+    if (!GLAD_GL_ARB_texture_storage)
+        unsupported_ext.append("ARB_texture_storage");
+    if (!GLAD_GL_ARB_multi_bind)
+        unsupported_ext.append("ARB_multi_bind");
 
     // Extensions required to support some texture formats.
     if (!GLAD_GL_EXT_texture_compression_s3tc)
@@ -800,7 +804,7 @@ void GMainWindow::OnMenuInstallToNAND() {
             tr("Cancel"), 0, progress_maximum, this);
         progress.setWindowModality(Qt::WindowModal);
 
-        for (size_t i = 0; i < src->GetSize(); i += buffer.size()) {
+        for (std::size_t i = 0; i < src->GetSize(); i += buffer.size()) {
             if (progress.wasCanceled()) {
                 dest->Resize(0);
                 return false;
diff --git a/src/yuzu/util/util.cpp b/src/yuzu/util/util.cpp
index e99042a23..62c080aff 100644
--- a/src/yuzu/util/util.cpp
+++ b/src/yuzu/util/util.cpp
@@ -30,8 +30,9 @@ QPixmap CreateCirclePixmapFromColor(const QColor& color) {
     QPixmap circle_pixmap(16, 16);
     circle_pixmap.fill(Qt::transparent);
     QPainter painter(&circle_pixmap);
+    painter.setRenderHint(QPainter::Antialiasing);
     painter.setPen(color);
     painter.setBrush(color);
-    painter.drawEllipse(0, 0, 15, 15);
+    painter.drawEllipse({circle_pixmap.width() / 2.0, circle_pixmap.height() / 2.0}, 7.0, 7.0);
     return circle_pixmap;
 }
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index f00b5a66b..991abda2e 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -108,6 +108,8 @@ void Config::ReadValues() {
 
     // Audio
     Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
+    Settings::values.enable_audio_stretching =
+        sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
     Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
     Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1);
 
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 6ed9e7962..002a4ec15 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -150,6 +150,12 @@ swap_screen =
 # auto (default): Auto-select, null: No audio output, cubeb: Cubeb audio engine (if available)
 output_engine =
 
+# Whether or not to enable the audio-stretching post-processing effect.
+# This effect adjusts audio speed to match emulation speed and helps prevent audio stutter,
+# at the cost of increasing audio latency.
+# 0: No, 1 (default): Yes
+enable_audio_stretching =
+
 # Which audio device to use.
 # auto (default): Auto-select
 output_device =
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 1c4717123..0733301b2 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -94,6 +94,10 @@ bool EmuWindow_SDL2::SupportsRequiredGLExtensions() {
         unsupported_ext.push_back("ARB_texture_mirror_clamp_to_edge");
     if (!GLAD_GL_ARB_base_instance)
         unsupported_ext.push_back("ARB_base_instance");
+    if (!GLAD_GL_ARB_texture_storage)
+        unsupported_ext.push_back("ARB_texture_storage");
+    if (!GLAD_GL_ARB_multi_bind)
+        unsupported_ext.push_back("ARB_multi_bind");
 
     // Extensions required to support some texture formats.
     if (!GLAD_GL_EXT_texture_compression_s3tc)