186 files changed, 5330 insertions, 4030 deletions
diff --git a/dist/qt_themes/colorful_dark/icons/index.theme b/dist/qt_themes/colorful_dark/icons/index.theme
index 94d5ae8aa..19dc0369a 100644
--- a/dist/qt_themes/colorful_dark/icons/index.theme
+++ b/dist/qt_themes/colorful_dark/icons/index.theme
@@ -1,7 +1,7 @@
 [Icon Theme]
 Name=colorful_dark
 Comment=Colorful theme (Dark style)
-Inherits=default
+Inherits=colorful
 Directories=16x16
  
 [16x16]
diff --git a/dist/qt_themes/colorful_midnight_blue/icons/index.theme b/dist/qt_themes/colorful_midnight_blue/icons/index.theme
index e23bfe6f9..dcb2c50d6 100644
--- a/dist/qt_themes/colorful_midnight_blue/icons/index.theme
+++ b/dist/qt_themes/colorful_midnight_blue/icons/index.theme
@@ -1,7 +1,7 @@
 [Icon Theme]
 Name=colorful_midnight_blue
 Comment=Colorful theme (Midnight Blue style)
-Inherits=default
+Inherits=colorful
 Directories=16x16
 
 [16x16]
diff --git a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
index 70e540b06..a64037455 100644
--- a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
+++ b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
@@ -1257,10 +1257,6 @@ QComboBox::item:alternate {
   background: #19232D;
 }
 
-QComboBox::item:checked {
-  font-weight: bold;
-}
-
 QComboBox::item:selected {
   border: 0px solid transparent;
 }
diff --git a/dist/yuzu.bmp b/dist/yuzu.bmp
new file mode 100644
index 000000000..66f2f696f
--- /dev/null
+++ b/dist/yuzu.bmp
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index d1d177b51..a0ae07752 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -15,6 +15,8 @@ add_library(audio_core STATIC
     command_generator.cpp
     command_generator.h
     common.h
+    delay_line.cpp
+    delay_line.h
     effect_context.cpp
     effect_context.h
     info_updater.cpp
diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp
index 5b1065520..437cc5ccd 100644
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <cmath>
+#include <numbers>
 #include "audio_core/algorithm/interpolate.h"
 #include "audio_core/command_generator.h"
 #include "audio_core/effect_context.h"
@@ -13,6 +15,20 @@ namespace AudioCore {
 namespace {
 constexpr std::size_t MIX_BUFFER_SIZE = 0x3f00;
 constexpr std::size_t SCALED_MIX_BUFFER_SIZE = MIX_BUFFER_SIZE << 15ULL;
+using DelayLineTimes = std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>;
+
+constexpr DelayLineTimes FDN_MIN_DELAY_LINE_TIMES{5.0f, 6.0f, 13.0f, 14.0f};
+constexpr DelayLineTimes FDN_MAX_DELAY_LINE_TIMES{45.704f, 82.782f, 149.94f, 271.58f};
+constexpr DelayLineTimes DECAY0_MAX_DELAY_LINE_TIMES{17.0f, 13.0f, 9.0f, 7.0f};
+constexpr DelayLineTimes DECAY1_MAX_DELAY_LINE_TIMES{19.0f, 11.0f, 10.0f, 6.0f};
+constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_TAP_TIMES{
+    0.017136f, 0.059154f, 0.161733f, 0.390186f, 0.425262f, 0.455411f, 0.689737f,
+    0.745910f, 0.833844f, 0.859502f, 0.000000f, 0.075024f, 0.168788f, 0.299901f,
+    0.337443f, 0.371903f, 0.599011f, 0.716741f, 0.817859f, 0.851664f};
+constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
+    0.67096f, 0.61027f, 1.0f,     0.35680f, 0.68361f, 0.65978f, 0.51939f,
+    0.24712f, 0.45945f, 0.45021f, 0.64196f, 0.54879f, 0.92925f, 0.38270f,
+    0.72867f, 0.69794f, 0.5464f,  0.24563f, 0.45214f, 0.44042f};
 
 template <std::size_t N>
 void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
@@ -65,6 +81,154 @@ s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) {
     }
 }
 
+float Pow10(float x) {
+    if (x >= 0.0f) {
+        return 1.0f;
+    } else if (x <= -5.3f) {
+        return 0.0f;
+    }
+    return std::pow(10.0f, x);
+}
+
+float SinD(float degrees) {
+    return std::sin(degrees * std::numbers::pi_v<float> / 180.0f);
+}
+
+float CosD(float degrees) {
+    return std::cos(degrees * std::numbers::pi_v<float> / 180.0f);
+}
+
+float ToFloat(s32 sample) {
+    return static_cast<float>(sample) / 65536.f;
+}
+
+s32 ToS32(float sample) {
+    constexpr auto min = -8388608.0f;
+    constexpr auto max = 8388607.f;
+    float rescaled_sample = sample * 65536.0f;
+    if (rescaled_sample < min) {
+        rescaled_sample = min;
+    }
+    if (rescaled_sample > max) {
+        rescaled_sample = max;
+    }
+    return static_cast<s32>(rescaled_sample);
+}
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_1CH{0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_2CH{0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
+                                                           1, 1, 1, 0, 0, 0, 0, 1, 1, 1};
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_4CH{0, 0, 0, 1, 1, 1, 1, 2, 2, 2,
+                                                           1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1, 2, 2, 2,
+                                                           1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
+
+template <std::size_t CHANNEL_COUNT>
+void ApplyReverbGeneric(I3dl2ReverbState& state,
+                        const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input,
+                        const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output,
+                        s32 sample_count) {
+
+    auto GetTapLookup = []() {
+        if constexpr (CHANNEL_COUNT == 1) {
+            return REVERB_TAP_INDEX_1CH;
+        } else if constexpr (CHANNEL_COUNT == 2) {
+            return REVERB_TAP_INDEX_2CH;
+        } else if constexpr (CHANNEL_COUNT == 4) {
+            return REVERB_TAP_INDEX_4CH;
+        } else if constexpr (CHANNEL_COUNT == 6) {
+            return REVERB_TAP_INDEX_6CH;
+        }
+    };
+
+    const auto& tap_index_lut = GetTapLookup();
+    for (s32 sample = 0; sample < sample_count; sample++) {
+        std::array<f32, CHANNEL_COUNT> out_samples{};
+        std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fsamp{};
+        std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> mixed{};
+        std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> osamp{};
+
+        // Mix everything into a single sample
+        s32 temp_mixed_sample = 0;
+        for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
+            temp_mixed_sample += input[i][sample];
+        }
+        const auto current_sample = ToFloat(temp_mixed_sample);
+        const auto early_tap = state.early_delay_line.TapOut(state.early_to_late_taps);
+
+        for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_TAPS; i++) {
+            const auto tapped_samp =
+                state.early_delay_line.TapOut(state.early_tap_steps[i]) * EARLY_GAIN[i];
+            out_samples[tap_index_lut[i]] += tapped_samp;
+
+            if constexpr (CHANNEL_COUNT == 6) {
+                // handle lfe
+                out_samples[5] += tapped_samp;
+            }
+        }
+
+        state.lowpass_0 = current_sample * state.lowpass_2 + state.lowpass_0 * state.lowpass_1;
+        state.early_delay_line.Tick(state.lowpass_0);
+
+        for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
+            out_samples[i] *= state.early_gain;
+        }
+
+        // Two channel seems to apply a latet gain, we require to save this
+        f32 filter{};
+        for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+            filter = state.fdn_delay_line[i].GetOutputSample();
+            const auto computed = filter * state.lpf_coefficients[0][i] + state.shelf_filter[i];
+            state.shelf_filter[i] =
+                filter * state.lpf_coefficients[1][i] + computed * state.lpf_coefficients[2][i];
+            fsamp[i] = computed;
+        }
+
+        // Mixing matrix
+        mixed[0] = fsamp[1] + fsamp[2];
+        mixed[1] = -fsamp[0] - fsamp[3];
+        mixed[2] = fsamp[0] - fsamp[3];
+        mixed[3] = fsamp[1] - fsamp[2];
+
+        if constexpr (CHANNEL_COUNT == 2) {
+            for (auto& mix : mixed) {
+                mix *= (filter * state.late_gain);
+            }
+        }
+
+        for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+            const auto late = early_tap * state.late_gain;
+            osamp[i] = state.decay_delay_line0[i].Tick(late + mixed[i]);
+            osamp[i] = state.decay_delay_line1[i].Tick(osamp[i]);
+            state.fdn_delay_line[i].Tick(osamp[i]);
+        }
+
+        if constexpr (CHANNEL_COUNT == 1) {
+            output[0][sample] = ToS32(state.dry_gain * ToFloat(input[0][sample]) +
+                                      (out_samples[0] + osamp[0] + osamp[1]));
+        } else if constexpr (CHANNEL_COUNT == 2 || CHANNEL_COUNT == 4) {
+            for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
+                output[i][sample] =
+                    ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i]));
+            }
+        } else if constexpr (CHANNEL_COUNT == 6) {
+            const auto temp_center = state.center_delay_line.Tick(0.5f * (osamp[2] - osamp[3]));
+            for (std::size_t i = 0; i < 4; i++) {
+                output[i][sample] =
+                    ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i]));
+            }
+            output[4][sample] =
+                ToS32(state.dry_gain * ToFloat(input[4][sample]) + (out_samples[4] + temp_center));
+            output[5][sample] =
+                ToS32(state.dry_gain * ToFloat(input[5][sample]) + (out_samples[5] + osamp[3]));
+        }
+    }
+}
+
 } // namespace
 
 CommandGenerator::CommandGenerator(AudioCommon::AudioRendererParameter& worker_params_,
@@ -271,11 +435,10 @@ void CommandGenerator::GenerateBiquadFilterCommandForVoice(ServerVoiceInfo& voic
         }
 
         // Generate biquad filter
-        //        GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter,
-        //        dsp_state.biquad_filter_state,
-        //                                    mix_buffer_count + channel, mix_buffer_count +
-        //                                    channel, worker_params.sample_count,
-        //                                    voice_info.GetInParams().node_id);
+        // GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter,
+        // dsp_state.biquad_filter_state,
+        //                            mix_buffer_count + channel, mix_buffer_count + channel,
+        //                            worker_params.sample_count, voice_info.GetInParams().node_id);
     }
 }
 
@@ -376,21 +539,54 @@ void CommandGenerator::GenerateEffectCommand(ServerMixInfo& mix_info) {
 
 void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, EffectBase* info,
                                                         bool enabled) {
-    if (!enabled) {
+    auto* reverb = dynamic_cast<EffectI3dl2Reverb*>(info);
+    const auto& params = reverb->GetParams();
+    auto& state = reverb->GetState();
+    const auto channel_count = params.channel_count;
+
+    if (channel_count != 1 && channel_count != 2 && channel_count != 4 && channel_count != 6) {
         return;
     }
-    const auto& params = dynamic_cast<EffectI3dl2Reverb*>(info)->GetParams();
-    const auto channel_count = params.channel_count;
+
+    std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{};
+    std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{};
+
+    const auto status = params.status;
     for (s32 i = 0; i < channel_count; i++) {
-        // TODO(ogniK): Actually implement reverb
-        /*
-        if (params.input[i] != params.output[i]) {
-            const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]);
-            auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]);
-            ApplyMix<1>(output, input, 32768, worker_params.sample_count);
-        }*/
-        auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]);
-        std::memset(output, 0, worker_params.sample_count * sizeof(s32));
+        input[i] = GetMixBuffer(mix_buffer_offset + params.input[i]);
+        output[i] = GetMixBuffer(mix_buffer_offset + params.output[i]);
+    }
+
+    if (enabled) {
+        if (status == ParameterStatus::Initialized) {
+            InitializeI3dl2Reverb(reverb->GetParams(), state, info->GetWorkBuffer());
+        } else if (status == ParameterStatus::Updating) {
+            UpdateI3dl2Reverb(reverb->GetParams(), state, false);
+        }
+    }
+
+    if (enabled) {
+        switch (channel_count) {
+        case 1:
+            ApplyReverbGeneric<1>(state, input, output, worker_params.sample_count);
+            break;
+        case 2:
+            ApplyReverbGeneric<2>(state, input, output, worker_params.sample_count);
+            break;
+        case 4:
+            ApplyReverbGeneric<4>(state, input, output, worker_params.sample_count);
+            break;
+        case 6:
+            ApplyReverbGeneric<6>(state, input, output, worker_params.sample_count);
+            break;
+        }
+    } else {
+        for (s32 i = 0; i < channel_count; i++) {
+            // Only copy if the buffer input and output do not match!
+            if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
+                std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32));
+            }
+        }
     }
 }
 
@@ -528,6 +724,133 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
     return sample_count;
 }
 
+void CommandGenerator::InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
+                                             std::vector<u8>& work_buffer) {
+    // Reset state
+    state.lowpass_0 = 0.0f;
+    state.lowpass_1 = 0.0f;
+    state.lowpass_2 = 0.0f;
+
+    state.early_delay_line.Reset();
+    state.early_tap_steps.fill(0);
+    state.early_gain = 0.0f;
+    state.late_gain = 0.0f;
+    state.early_to_late_taps = 0;
+    for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+        state.fdn_delay_line[i].Reset();
+        state.decay_delay_line0[i].Reset();
+        state.decay_delay_line1[i].Reset();
+    }
+    state.last_reverb_echo = 0.0f;
+    state.center_delay_line.Reset();
+    for (auto& coef : state.lpf_coefficients) {
+        coef.fill(0.0f);
+    }
+    state.shelf_filter.fill(0.0f);
+    state.dry_gain = 0.0f;
+
+    const auto sample_rate = info.sample_rate / 1000;
+    f32* work_buffer_ptr = reinterpret_cast<f32*>(work_buffer.data());
+
+    s32 delay_samples{};
+    for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+        delay_samples =
+            AudioCommon::CalculateDelaySamples(sample_rate, FDN_MAX_DELAY_LINE_TIMES[i]);
+        state.fdn_delay_line[i].Initialize(delay_samples, work_buffer_ptr);
+        work_buffer_ptr += delay_samples + 1;
+
+        delay_samples =
+            AudioCommon::CalculateDelaySamples(sample_rate, DECAY0_MAX_DELAY_LINE_TIMES[i]);
+        state.decay_delay_line0[i].Initialize(delay_samples, 0.0f, work_buffer_ptr);
+        work_buffer_ptr += delay_samples + 1;
+
+        delay_samples =
+            AudioCommon::CalculateDelaySamples(sample_rate, DECAY1_MAX_DELAY_LINE_TIMES[i]);
+        state.decay_delay_line1[i].Initialize(delay_samples, 0.0f, work_buffer_ptr);
+        work_buffer_ptr += delay_samples + 1;
+    }
+    delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 5.0f);
+    state.center_delay_line.Initialize(delay_samples, work_buffer_ptr);
+    work_buffer_ptr += delay_samples + 1;
+
+    delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 400.0f);
+    state.early_delay_line.Initialize(delay_samples, work_buffer_ptr);
+
+    UpdateI3dl2Reverb(info, state, true);
+}
+
+void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
+                                         bool should_clear) {
+
+    state.dry_gain = info.dry_gain;
+    state.shelf_filter.fill(0.0f);
+    state.lowpass_0 = 0.0f;
+    state.early_gain = Pow10(std::min(info.room + info.reflection, 5000.0f) / 2000.0f);
+    state.late_gain = Pow10(std::min(info.room + info.reverb, 5000.0f) / 2000.0f);
+
+    const auto sample_rate = info.sample_rate / 1000;
+    const f32 hf_gain = Pow10(info.room_hf / 2000.0f);
+    if (hf_gain >= 1.0f) {
+        state.lowpass_2 = 1.0f;
+        state.lowpass_1 = 0.0f;
+    } else {
+        const auto a = 1.0f - hf_gain;
+        const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference /
+                                                     static_cast<f32>(info.sample_rate)));
+        const auto c = std::sqrt(b * b - 4.0f * a * a);
+
+        state.lowpass_1 = (b - c) / (2.0f * a);
+        state.lowpass_2 = 1.0f - state.lowpass_1;
+    }
+    state.early_to_late_taps = AudioCommon::CalculateDelaySamples(
+        sample_rate, 1000.0f * (info.reflection_delay + info.reverb_delay));
+
+    state.last_reverb_echo = 0.6f * info.diffusion * 0.01f;
+    for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+        const auto length =
+            FDN_MIN_DELAY_LINE_TIMES[i] +
+            (info.density / 100.0f) * (FDN_MAX_DELAY_LINE_TIMES[i] - FDN_MIN_DELAY_LINE_TIMES[i]);
+        state.fdn_delay_line[i].SetDelay(AudioCommon::CalculateDelaySamples(sample_rate, length));
+
+        const auto delay_sample_counts = state.fdn_delay_line[i].GetDelay() +
+                                         state.decay_delay_line0[i].GetDelay() +
+                                         state.decay_delay_line1[i].GetDelay();
+
+        float a = (-60.0f * static_cast<f32>(delay_sample_counts)) /
+                  (info.decay_time * static_cast<f32>(info.sample_rate));
+        float b = a / info.hf_decay_ratio;
+        float c = CosD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate)) /
+                  SinD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate));
+        float d = Pow10((b - a) / 40.0f);
+        float e = Pow10((b + a) / 40.0f) * 0.7071f;
+
+        state.lpf_coefficients[0][i] = e * ((d * c) + 1.0f) / (c + d);
+        state.lpf_coefficients[1][i] = e * (1.0f - (d * c)) / (c + d);
+        state.lpf_coefficients[2][i] = (c - d) / (c + d);
+
+        state.decay_delay_line0[i].SetCoefficient(state.last_reverb_echo);
+        state.decay_delay_line1[i].SetCoefficient(-0.9f * state.last_reverb_echo);
+    }
+
+    if (should_clear) {
+        for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+            state.fdn_delay_line[i].Clear();
+            state.decay_delay_line0[i].Clear();
+            state.decay_delay_line1[i].Clear();
+        }
+        state.early_delay_line.Clear();
+        state.center_delay_line.Clear();
+    }
+
+    const auto max_early_delay = state.early_delay_line.GetMaxDelay();
+    const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f);
+    for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) {
+        const auto length = AudioCommon::CalculateDelaySamples(
+            sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]);
+        state.early_tap_steps[tap] = std::min(length, max_early_delay);
+    }
+}
+
 void CommandGenerator::GenerateVolumeRampCommand(float last_volume, float current_volume,
                                                  s32 channel, s32 node_id) {
     const auto last = static_cast<s32>(last_volume * 32768.0f);
diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h
index b937350b1..2ebb755b0 100644
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -21,6 +21,8 @@ class ServerMixInfo;
 class EffectContext;
 class EffectBase;
 struct AuxInfoDSP;
+struct I3dl2ReverbParams;
+struct I3dl2ReverbState;
 using MixVolumeBuffer = std::array<float, AudioCommon::MAX_MIX_BUFFERS>;
 
 class CommandGenerator {
@@ -80,6 +82,9 @@ private:
     s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data,
                       u32 sample_count, u32 read_offset, u32 read_count);
 
+    void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
+                               std::vector<u8>& work_buffer);
+    void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear);
     // DSP Code
     s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
                     s32 channel, std::size_t mix_offset);
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index ec59a3ba9..fe546c55d 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -33,6 +33,29 @@ constexpr std::size_t TEMP_MIX_BASE_SIZE = 0x3f00; // TODO(ogniK): Work out this
 // and our const ends up being 0x3f04, the 4 bytes are most
 // likely the sample history
 constexpr std::size_t TOTAL_TEMP_MIX_SIZE = TEMP_MIX_BASE_SIZE + AudioCommon::MAX_SAMPLE_HISTORY;
+constexpr f32 I3DL2REVERB_MAX_LEVEL = 5000.0f;
+constexpr f32 I3DL2REVERB_MIN_REFLECTION_DURATION = 0.02f;
+constexpr std::size_t I3DL2REVERB_TAPS = 20;
+constexpr std::size_t I3DL2REVERB_DELAY_LINE_COUNT = 4;
+using Fractional = s32;
+
+template <typename T>
+constexpr Fractional ToFractional(T x) {
+    return static_cast<Fractional>(x * static_cast<T>(0x4000));
+}
+
+constexpr Fractional MultiplyFractional(Fractional lhs, Fractional rhs) {
+    return static_cast<Fractional>(static_cast<s64>(lhs) * rhs >> 14);
+}
+
+constexpr s32 FractionalToFixed(Fractional x) {
+    const auto s = x & (1 << 13);
+    return static_cast<s32>(x >> 14) + s;
+}
+
+constexpr s32 CalculateDelaySamples(s32 sample_rate_khz, float time) {
+    return FractionalToFixed(MultiplyFractional(ToFractional(sample_rate_khz), ToFractional(time)));
+}
 
 static constexpr u32 VersionFromRevision(u32_le rev) {
     // "REV7" -> 7
diff --git a/src/audio_core/delay_line.cpp b/src/audio_core/delay_line.cpp
new file mode 100644
index 000000000..f4e4dd8d2
--- /dev/null
+++ b/src/audio_core/delay_line.cpp
@@ -0,0 +1,104 @@
+#include <cstring>
+#include "audio_core/delay_line.h"
+
+namespace AudioCore {
+DelayLineBase::DelayLineBase() = default;
+DelayLineBase::~DelayLineBase() = default;
+
+void DelayLineBase::Initialize(s32 max_delay_, float* src_buffer) {
+    buffer = src_buffer;
+    buffer_end = buffer + max_delay_;
+    max_delay = max_delay_;
+    output = buffer;
+    SetDelay(max_delay_);
+    Clear();
+}
+
+void DelayLineBase::SetDelay(s32 new_delay) {
+    if (max_delay < new_delay) {
+        return;
+    }
+    delay = new_delay;
+    input = (buffer + ((output - buffer) + new_delay) % (max_delay + 1));
+}
+
+s32 DelayLineBase::GetDelay() const {
+    return delay;
+}
+
+s32 DelayLineBase::GetMaxDelay() const {
+    return max_delay;
+}
+
+f32 DelayLineBase::TapOut(s32 last_sample) {
+    const float* ptr = input - (last_sample + 1);
+    if (ptr < buffer) {
+        ptr += (max_delay + 1);
+    }
+
+    return *ptr;
+}
+
+f32 DelayLineBase::Tick(f32 sample) {
+    *(input++) = sample;
+    const auto out_sample = *(output++);
+
+    if (buffer_end < input) {
+        input = buffer;
+    }
+
+    if (buffer_end < output) {
+        output = buffer;
+    }
+
+    return out_sample;
+}
+
+float* DelayLineBase::GetInput() {
+    return input;
+}
+
+const float* DelayLineBase::GetInput() const {
+    return input;
+}
+
+f32 DelayLineBase::GetOutputSample() const {
+    return *output;
+}
+
+void DelayLineBase::Clear() {
+    std::memset(buffer, 0, sizeof(float) * max_delay);
+}
+
+void DelayLineBase::Reset() {
+    buffer = nullptr;
+    buffer_end = nullptr;
+    max_delay = 0;
+    input = nullptr;
+    output = nullptr;
+    delay = 0;
+}
+
+DelayLineAllPass::DelayLineAllPass() = default;
+DelayLineAllPass::~DelayLineAllPass() = default;
+
+void DelayLineAllPass::Initialize(u32 delay_, float coeffcient_, f32* src_buffer) {
+    DelayLineBase::Initialize(delay_, src_buffer);
+    SetCoefficient(coeffcient_);
+}
+
+void DelayLineAllPass::SetCoefficient(float coeffcient_) {
+    coefficient = coeffcient_;
+}
+
+f32 DelayLineAllPass::Tick(f32 sample) {
+    const auto temp = sample - coefficient * *output;
+    return coefficient * temp + DelayLineBase::Tick(temp);
+}
+
+void DelayLineAllPass::Reset() {
+    coefficient = 0.0f;
+    DelayLineBase::Reset();
+}
+
+} // namespace AudioCore
diff --git a/src/audio_core/delay_line.h b/src/audio_core/delay_line.h
new file mode 100644
index 000000000..cafddd432
--- /dev/null
+++ b/src/audio_core/delay_line.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include "common/common_types.h"
+
+namespace AudioCore {
+
+class DelayLineBase {
+public:
+    DelayLineBase();
+    ~DelayLineBase();
+
+    void Initialize(s32 max_delay_, float* src_buffer);
+    void SetDelay(s32 new_delay);
+    s32 GetDelay() const;
+    s32 GetMaxDelay() const;
+    f32 TapOut(s32 last_sample);
+    f32 Tick(f32 sample);
+    float* GetInput();
+    const float* GetInput() const;
+    f32 GetOutputSample() const;
+    void Clear();
+    void Reset();
+
+protected:
+    float* buffer{nullptr};
+    float* buffer_end{nullptr};
+    s32 max_delay{};
+    float* input{nullptr};
+    float* output{nullptr};
+    s32 delay{};
+};
+
+class DelayLineAllPass final : public DelayLineBase {
+public:
+    DelayLineAllPass();
+    ~DelayLineAllPass();
+
+    void Initialize(u32 delay, float coeffcient_, f32* src_buffer);
+    void SetCoefficient(float coeffcient_);
+    f32 Tick(f32 sample);
+    void Reset();
+
+private:
+    float coefficient{};
+};
+} // namespace AudioCore
diff --git a/src/audio_core/effect_context.cpp b/src/audio_core/effect_context.cpp
index f770b9608..89e4573c7 100644
--- a/src/audio_core/effect_context.cpp
+++ b/src/audio_core/effect_context.cpp
@@ -90,6 +90,14 @@ s32 EffectBase::GetProcessingOrder() const {
     return processing_order;
 }
 
+std::vector<u8>& EffectBase::GetWorkBuffer() {
+    return work_buffer;
+}
+
+const std::vector<u8>& EffectBase::GetWorkBuffer() const {
+    return work_buffer;
+}
+
 EffectI3dl2Reverb::EffectI3dl2Reverb() : EffectGeneric(EffectType::I3dl2Reverb) {}
 EffectI3dl2Reverb::~EffectI3dl2Reverb() = default;
 
@@ -117,6 +125,12 @@ void EffectI3dl2Reverb::Update(EffectInfo::InParams& in_params) {
         usage = UsageState::Initialized;
         params.status = ParameterStatus::Initialized;
         skipped = in_params.buffer_address == 0 || in_params.buffer_size == 0;
+        if (!skipped) {
+            auto& cur_work_buffer = GetWorkBuffer();
+            // Has two buffers internally
+            cur_work_buffer.resize(in_params.buffer_size * 2);
+            std::fill(cur_work_buffer.begin(), cur_work_buffer.end(), 0);
+        }
     }
 }
 
@@ -129,6 +143,14 @@ void EffectI3dl2Reverb::UpdateForCommandGeneration() {
     GetParams().status = ParameterStatus::Updated;
 }
 
+I3dl2ReverbState& EffectI3dl2Reverb::GetState() {
+    return state;
+}
+
+const I3dl2ReverbState& EffectI3dl2Reverb::GetState() const {
+    return state;
+}
+
 EffectBiquadFilter::EffectBiquadFilter() : EffectGeneric(EffectType::BiquadFilter) {}
 EffectBiquadFilter::~EffectBiquadFilter() = default;
 
diff --git a/src/audio_core/effect_context.h b/src/audio_core/effect_context.h
index c5e0b398c..5e0655dd7 100644
--- a/src/audio_core/effect_context.h
+++ b/src/audio_core/effect_context.h
@@ -8,6 +8,7 @@
 #include <memory>
 #include <vector>
 #include "audio_core/common.h"
+#include "audio_core/delay_line.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
@@ -194,6 +195,8 @@ public:
     [[nodiscard]] bool IsEnabled() const;
     [[nodiscard]] s32 GetMixID() const;
     [[nodiscard]] s32 GetProcessingOrder() const;
+    [[nodiscard]] std::vector<u8>& GetWorkBuffer();
+    [[nodiscard]] const std::vector<u8>& GetWorkBuffer() const;
 
 protected:
     UsageState usage{UsageState::Invalid};
@@ -201,6 +204,7 @@ protected:
     s32 mix_id{};
     s32 processing_order{};
     bool enabled = false;
+    std::vector<u8> work_buffer{};
 };
 
 template <typename T>
@@ -212,7 +216,7 @@ public:
         return internal_params;
     }
 
-    const I3dl2ReverbParams& GetParams() const {
+    const T& GetParams() const {
         return internal_params;
     }
 
@@ -229,6 +233,27 @@ public:
     void UpdateForCommandGeneration() override;
 };
 
+struct I3dl2ReverbState {
+    f32 lowpass_0{};
+    f32 lowpass_1{};
+    f32 lowpass_2{};
+
+    DelayLineBase early_delay_line{};
+    std::array<u32, AudioCommon::I3DL2REVERB_TAPS> early_tap_steps{};
+    f32 early_gain{};
+    f32 late_gain{};
+
+    u32 early_to_late_taps{};
+    std::array<DelayLineBase, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fdn_delay_line{};
+    std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line0{};
+    std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line1{};
+    f32 last_reverb_echo{};
+    DelayLineBase center_delay_line{};
+    std::array<std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>, 3> lpf_coefficients{};
+    std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> shelf_filter{};
+    f32 dry_gain{};
+};
+
 class EffectI3dl2Reverb : public EffectGeneric<I3dl2ReverbParams> {
 public:
     explicit EffectI3dl2Reverb();
@@ -237,8 +262,12 @@ public:
     void Update(EffectInfo::InParams& in_params) override;
     void UpdateForCommandGeneration() override;
 
+    I3dl2ReverbState& GetState();
+    const I3dl2ReverbState& GetState() const;
+
 private:
     bool skipped = false;
+    I3dl2ReverbState state{};
 };
 
 class EffectBiquadFilter : public EffectGeneric<BiquadFilterParams> {
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index bfd11e76d..b657506b1 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -168,7 +168,6 @@ add_library(common STATIC
     time_zone.cpp
     time_zone.h
     tree.h
-    uint128.cpp
     uint128.h
     uuid.cpp
     uuid.h
@@ -206,6 +205,8 @@ if (MSVC)
 else()
   target_compile_options(common PRIVATE
     -Werror
+
+    $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation>
   )
 endif()
 
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
deleted file mode 100644
index 16bf7c828..000000000
--- a/src/common/uint128.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#ifdef _MSC_VER
-#include <intrin.h>
-
-#pragma intrinsic(_umul128)
-#pragma intrinsic(_udiv128)
-#endif
-#include <cstring>
-#include "common/uint128.h"
-
-namespace Common {
-
-#ifdef _MSC_VER
-
-u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
-    u128 r{};
-    r[0] = _umul128(a, b, &r[1]);
-    u64 remainder;
-#if _MSC_VER < 1923
-    return udiv128(r[1], r[0], d, &remainder);
-#else
-    return _udiv128(r[1], r[0], d, &remainder);
-#endif
-}
-
-#else
-
-u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
-    const u64 diva = a / d;
-    const u64 moda = a % d;
-    const u64 divb = b / d;
-    const u64 modb = b % d;
-    return diva * b + moda * divb + moda * modb / d;
-}
-
-#endif
-
-u128 Multiply64Into128(u64 a, u64 b) {
-    u128 result;
-#ifdef _MSC_VER
-    result[0] = _umul128(a, b, &result[1]);
-#else
-    unsigned __int128 tmp = a;
-    tmp *= b;
-    std::memcpy(&result, &tmp, sizeof(u128));
-#endif
-    return result;
-}
-
-std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
-    u64 remainder = dividend[0] % divisor;
-    u64 accum = dividend[0] / divisor;
-    if (dividend[1] == 0)
-        return {accum, remainder};
-    // We ignore dividend[1] / divisor as that overflows
-    const u64 first_segment = (dividend[1] % divisor) << 32;
-    accum += (first_segment / divisor) << 32;
-    const u64 second_segment = (first_segment % divisor) << 32;
-    accum += (second_segment / divisor);
-    remainder += second_segment % divisor;
-    if (remainder >= divisor) {
-        accum++;
-        remainder -= divisor;
-    }
-    return {accum, remainder};
-}
-
-} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
index 969259ab6..83560a9ce 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -4,19 +4,98 @@
 
 #pragma once
 
+#include <cstring>
 #include <utility>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#pragma intrinsic(__umulh)
+#pragma intrinsic(_umul128)
+#pragma intrinsic(_udiv128)
+#else
+#include <x86intrin.h>
+#endif
+
 #include "common/common_types.h"
 
 namespace Common {
 
 // This function multiplies 2 u64 values and divides it by a u64 value.
-[[nodiscard]] u64 MultiplyAndDivide64(u64 a, u64 b, u64 d);
+[[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
+#ifdef _MSC_VER
+    u128 r{};
+    r[0] = _umul128(a, b, &r[1]);
+    u64 remainder;
+#if _MSC_VER < 1923
+    return udiv128(r[1], r[0], d, &remainder);
+#else
+    return _udiv128(r[1], r[0], d, &remainder);
+#endif
+#else
+    const u64 diva = a / d;
+    const u64 moda = a % d;
+    const u64 divb = b / d;
+    const u64 modb = b % d;
+    return diva * b + moda * divb + moda * modb / d;
+#endif
+}
 
 // This function multiplies 2 u64 values and produces a u128 value;
-[[nodiscard]] u128 Multiply64Into128(u64 a, u64 b);
+[[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) {
+    u128 result;
+#ifdef _MSC_VER
+    result[0] = _umul128(a, b, &result[1]);
+#else
+    unsigned __int128 tmp = a;
+    tmp *= b;
+    std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+    return result;
+}
+
+[[nodiscard]] static inline u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
+#ifdef __SIZEOF_INT128__
+    const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
+    return static_cast<u64>(base / divisor);
+#elif defined(_M_X64) || defined(_M_ARM64)
+    std::array<u64, 2> r = {0, numerator};
+    u64 remainder;
+#if _MSC_VER < 1923
+    return udiv128(r[1], r[0], divisor, &remainder);
+#else
+    return _udiv128(r[1], r[0], divisor, &remainder);
+#endif
+#else
+    // This one is bit more inaccurate.
+    return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
+#endif
+}
+
+[[nodiscard]] static inline u64 MultiplyHigh(u64 a, u64 b) {
+#ifdef __SIZEOF_INT128__
+    return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
+#elif defined(_M_X64) || defined(_M_ARM64)
+    return __umulh(a, b); // MSVC
+#else
+    // Generic fallback
+    const u64 a_lo = u32(a);
+    const u64 a_hi = a >> 32;
+    const u64 b_lo = u32(b);
+    const u64 b_hi = b >> 32;
+
+    const u64 a_x_b_hi = a_hi * b_hi;
+    const u64 a_x_b_mid = a_hi * b_lo;
+    const u64 b_x_a_mid = b_hi * a_lo;
+    const u64 a_x_b_lo = a_lo * b_lo;
+
+    const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
+                           static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
+                          32;
+
+    const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
 
-// This function divides a u128 by a u32 value and produces two u64 values:
-// the result of division and the remainder
-[[nodiscard]] std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+    return multhi;
+#endif
+}
 
 } // namespace Common
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index a8c143f85..1545993bd 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <cstdint>
+
 #include "common/uint128.h"
 #include "common/wall_clock.h"
 
@@ -18,7 +20,9 @@ using base_time_point = std::chrono::time_point<base_timer>;
 class StandardWallClock final : public WallClock {
 public:
     explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_)
-        : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) {
+        : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false),
+          emulated_clock_factor{GetFixedPoint64Factor(emulated_clock_frequency, 1000000000)},
+          emulated_cpu_factor{GetFixedPoint64Factor(emulated_cpu_frequency, 1000000000)} {
         start_time = base_timer::now();
     }
 
@@ -41,16 +45,11 @@ public:
     }
 
     u64 GetClockCycles() override {
-        std::chrono::nanoseconds time_now = GetTimeNS();
-        const u128 temporary =
-            Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
-        return Common::Divide128On32(temporary, 1000000000).first;
+        return MultiplyHigh(GetTimeNS().count(), emulated_clock_factor);
     }
 
     u64 GetCPUCycles() override {
-        std::chrono::nanoseconds time_now = GetTimeNS();
-        const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
-        return Common::Divide128On32(temporary, 1000000000).first;
+        return MultiplyHigh(GetTimeNS().count(), emulated_cpu_factor);
     }
 
     void Pause([[maybe_unused]] bool is_paused) override {
@@ -59,6 +58,8 @@ public:
 
 private:
     base_time_point start_time;
+    const u64 emulated_clock_factor;
+    const u64 emulated_cpu_factor;
 };
 
 #ifdef ARCHITECTURE_x86_64
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index a65f6b832..87de40624 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -8,68 +8,10 @@
 #include <mutex>
 #include <thread>
 
-#ifdef _MSC_VER
-#include <intrin.h>
-
-#pragma intrinsic(__umulh)
-#pragma intrinsic(_udiv128)
-#else
-#include <x86intrin.h>
-#endif
-
 #include "common/atomic_ops.h"
 #include "common/uint128.h"
 #include "common/x64/native_clock.h"
 
-namespace {
-
-[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
-#ifdef __SIZEOF_INT128__
-    const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
-    return static_cast<u64>(base / divisor);
-#elif defined(_M_X64) || defined(_M_ARM64)
-    std::array<u64, 2> r = {0, numerator};
-    u64 remainder;
-#if _MSC_VER < 1923
-    return udiv128(r[1], r[0], divisor, &remainder);
-#else
-    return _udiv128(r[1], r[0], divisor, &remainder);
-#endif
-#else
-    // This one is bit more inaccurate.
-    return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
-#endif
-}
-
-[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) {
-#ifdef __SIZEOF_INT128__
-    return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
-#elif defined(_M_X64) || defined(_M_ARM64)
-    return __umulh(a, b); // MSVC
-#else
-    // Generic fallback
-    const u64 a_lo = u32(a);
-    const u64 a_hi = a >> 32;
-    const u64 b_lo = u32(b);
-    const u64 b_hi = b >> 32;
-
-    const u64 a_x_b_hi = a_hi * b_hi;
-    const u64 a_x_b_mid = a_hi * b_lo;
-    const u64 b_x_a_mid = b_hi * a_lo;
-    const u64 a_x_b_lo = a_lo * b_lo;
-
-    const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
-                           static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
-                          32;
-
-    const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
-
-    return multhi;
-#endif
-}
-
-} // namespace
-
 namespace Common {
 
 u64 EstimateRDTSCFrequency() {
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 386d7bddf..c6bdf72ec 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -19,7 +19,6 @@ add_library(core STATIC
     core.h
     core_timing.cpp
     core_timing.h
-    core_timing_util.cpp
     core_timing_util.h
     cpu_manager.cpp
     cpu_manager.h
@@ -148,7 +147,7 @@ add_library(core STATIC
     hle/kernel/client_session.h
     hle/kernel/code_set.cpp
     hle/kernel/code_set.h
-    hle/kernel/errors.h
+    hle/kernel/svc_results.h
     hle/kernel/global_scheduler_context.cpp
     hle/kernel/global_scheduler_context.h
     hle/kernel/handle_table.cpp
@@ -174,6 +173,7 @@ add_library(core STATIC
     hle/kernel/k_scheduler.h
     hle/kernel/k_scheduler_lock.h
     hle/kernel/k_scoped_lock.h
+    hle/kernel/k_scoped_resource_reservation.h
     hle/kernel/k_scoped_scheduler_lock_and_sleep.h
     hle/kernel/k_synchronization_object.cpp
     hle/kernel/k_synchronization_object.h
@@ -223,7 +223,6 @@ add_library(core STATIC
     hle/kernel/svc.cpp
     hle/kernel/svc.h
     hle/kernel/svc_common.h
-    hle/kernel/svc_results.h
     hle/kernel/svc_types.h
     hle/kernel/svc_wrap.h
     hle/kernel/time_manager.cpp
@@ -266,6 +265,7 @@ add_library(core STATIC
     hle/service/am/applets/software_keyboard.h
     hle/service/am/applets/web_browser.cpp
     hle/service/am/applets/web_browser.h
+    hle/service/am/applets/web_types.h
     hle/service/am/idle.cpp
     hle/service/am/idle.h
     hle/service/am/omm.cpp
@@ -400,6 +400,7 @@ add_library(core STATIC
     hle/service/hid/controllers/xpad.h
     hle/service/lbl/lbl.cpp
     hle/service/lbl/lbl.h
+    hle/service/ldn/errors.h
     hle/service/ldn/ldn.cpp
     hle/service/ldn/ldn.h
     hle/service/ldr/ldr.cpp
@@ -653,6 +654,8 @@ else()
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
         $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
 
+        $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation>
+
         -Wno-sign-conversion
     )
 endif()
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
deleted file mode 100644
index 8ce8e602e..000000000
--- a/src/core/core_timing_util.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
-// Licensed under GPLv2+
-// Refer to the license.txt file included.
-
-#include "core/core_timing_util.h"
-
-#include <cinttypes>
-#include <limits>
-#include "common/logging/log.h"
-#include "common/uint128.h"
-#include "core/hardware_properties.h"
-
-namespace Core::Timing {
-
-constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / Hardware::BASE_CLOCK_RATE;
-
-s64 msToCycles(std::chrono::milliseconds ms) {
-    if (static_cast<u64>(ms.count() / 1000) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_ERROR(Core_Timing, "Integer overflow, use max value");
-        return std::numeric_limits<s64>::max();
-    }
-    if (static_cast<u64>(ms.count()) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return Hardware::BASE_CLOCK_RATE * (ms.count() / 1000);
-    }
-    return (Hardware::BASE_CLOCK_RATE * ms.count()) / 1000;
-}
-
-s64 usToCycles(std::chrono::microseconds us) {
-    if (static_cast<u64>(us.count() / 1000000) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_ERROR(Core_Timing, "Integer overflow, use max value");
-        return std::numeric_limits<s64>::max();
-    }
-    if (static_cast<u64>(us.count()) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return Hardware::BASE_CLOCK_RATE * (us.count() / 1000000);
-    }
-    return (Hardware::BASE_CLOCK_RATE * us.count()) / 1000000;
-}
-
-s64 nsToCycles(std::chrono::nanoseconds ns) {
-    const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE);
-    return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first;
-}
-
-u64 msToClockCycles(std::chrono::milliseconds ns) {
-    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
-    return Common::Divide128On32(temp, 1000).first;
-}
-
-u64 usToClockCycles(std::chrono::microseconds ns) {
-    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
-    return Common::Divide128On32(temp, 1000000).first;
-}
-
-u64 nsToClockCycles(std::chrono::nanoseconds ns) {
-    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
-    return Common::Divide128On32(temp, 1000000000).first;
-}
-
-u64 CpuCyclesToClockCycles(u64 ticks) {
-    const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ);
-    return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
-}
-
-std::chrono::milliseconds CyclesToMs(s64 cycles) {
-    const u128 temporal = Common::Multiply64Into128(cycles, 1000);
-    u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
-    return std::chrono::milliseconds(ms);
-}
-
-std::chrono::nanoseconds CyclesToNs(s64 cycles) {
-    const u128 temporal = Common::Multiply64Into128(cycles, 1000000000);
-    u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
-    return std::chrono::nanoseconds(ns);
-}
-
-std::chrono::microseconds CyclesToUs(s64 cycles) {
-    const u128 temporal = Common::Multiply64Into128(cycles, 1000000);
-    u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
-    return std::chrono::microseconds(us);
-}
-
-} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index e4a046bf9..14c36a485 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -1,24 +1,59 @@
-// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
-// Licensed under GPLv2+
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #pragma once
 
 #include <chrono>
+
 #include "common/common_types.h"
+#include "core/hardware_properties.h"
 
 namespace Core::Timing {
 
-s64 msToCycles(std::chrono::milliseconds ms);
-s64 usToCycles(std::chrono::microseconds us);
-s64 nsToCycles(std::chrono::nanoseconds ns);
-u64 msToClockCycles(std::chrono::milliseconds ns);
-u64 usToClockCycles(std::chrono::microseconds ns);
-u64 nsToClockCycles(std::chrono::nanoseconds ns);
-std::chrono::milliseconds CyclesToMs(s64 cycles);
-std::chrono::nanoseconds CyclesToNs(s64 cycles);
-std::chrono::microseconds CyclesToUs(s64 cycles);
-
-u64 CpuCyclesToClockCycles(u64 ticks);
+namespace detail {
+constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000;
+constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000;
+} // namespace detail
+
+[[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) {
+    return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED;
+}
+
+[[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) {
+    return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000;
+}
+
+[[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) {
+    return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000;
+}
+
+[[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) {
+    return static_cast<u64>(ms.count()) * detail::CNTFREQ_ADJUSTED;
+}
+
+[[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) {
+    return us.count() * detail::CNTFREQ_ADJUSTED / 1000;
+}
+
+[[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) {
+    return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000;
+}
+
+[[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) {
+    return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED;
+}
+
+[[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) {
+    return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED);
+}
+
+[[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) {
+    return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED);
+}
+
+[[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) {
+    return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED);
+}
 
 } // namespace Core::Timing
diff --git a/src/core/frontend/applets/controller.h b/src/core/frontend/applets/controller.h
index dff71d8d9..b0626a0f9 100644
--- a/src/core/frontend/applets/controller.h
+++ b/src/core/frontend/applets/controller.h
@@ -31,6 +31,7 @@ struct ControllerParameters {
     bool allow_dual_joycons{};
     bool allow_left_joycon{};
     bool allow_right_joycon{};
+    bool allow_gamecube_controller{};
 };
 
 class ControllerApplet {
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index f8f005f15..0b6957e31 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -4,11 +4,11 @@
 
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/server_port.h"
 #include "core/hle/kernel/session.h"
+#include "core/hle/kernel/svc_results.h"
 
 namespace Kernel {
 
@@ -21,7 +21,7 @@ std::shared_ptr<ServerPort> ClientPort::GetServerPort() const {
 
 ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
     if (active_sessions >= max_sessions) {
-        return ERR_MAX_CONNECTIONS_REACHED;
+        return ResultMaxConnectionsReached;
     }
     active_sessions++;
 
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index a2be1a8f6..e230f365a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -3,11 +3,11 @@
 // Refer to the license.txt file included.
 
 #include "core/hle/kernel/client_session.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/hle/result.h"
 
 namespace Kernel {
@@ -43,7 +43,7 @@ ResultCode ClientSession::SendSyncRequest(std::shared_ptr<KThread> thread,
                                           Core::Timing::CoreTiming& core_timing) {
     // Keep ServerSession alive until we're done working with it.
     if (!parent->Server()) {
-        return ERR_SESSION_CLOSED_BY_REMOTE;
+        return ResultSessionClosedByRemote;
     }
 
     // Signal the server session that new data is available
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
deleted file mode 100644
index 7d32a39f0..000000000
--- a/src/core/hle/kernel/errors.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "core/hle/result.h"
-
-namespace Kernel {
-
-// Confirmed Switch kernel error codes
-
-constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
-constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
-constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};
-constexpr ResultCode ERR_TERMINATION_REQUESTED{ErrorModule::Kernel, 59};
-constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
-constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
-constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103};
-constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
-constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
-constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
-constexpr ResultCode ERR_INVALID_CURRENT_MEMORY{ErrorModule::Kernel, 106};
-constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
-constexpr ResultCode ERR_INVALID_MEMORY_RANGE{ErrorModule::Kernel, 110};
-constexpr ResultCode ERR_INVALID_PROCESSOR_ID{ErrorModule::Kernel, 113};
-constexpr ResultCode ERR_INVALID_THREAD_PRIORITY{ErrorModule::Kernel, 112};
-constexpr ResultCode ERR_INVALID_HANDLE{ErrorModule::Kernel, 114};
-constexpr ResultCode ERR_INVALID_POINTER{ErrorModule::Kernel, 115};
-constexpr ResultCode ERR_INVALID_COMBINATION{ErrorModule::Kernel, 116};
-constexpr ResultCode RESULT_TIMEOUT{ErrorModule::Kernel, 117};
-constexpr ResultCode ERR_SYNCHRONIZATION_CANCELED{ErrorModule::Kernel, 118};
-constexpr ResultCode ERR_CANCELLED{ErrorModule::Kernel, 118};
-constexpr ResultCode ERR_OUT_OF_RANGE{ErrorModule::Kernel, 119};
-constexpr ResultCode ERR_INVALID_ENUM_VALUE{ErrorModule::Kernel, 120};
-constexpr ResultCode ERR_NOT_FOUND{ErrorModule::Kernel, 121};
-constexpr ResultCode ERR_BUSY{ErrorModule::Kernel, 122};
-constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE{ErrorModule::Kernel, 123};
-constexpr ResultCode ERR_INVALID_STATE{ErrorModule::Kernel, 125};
-constexpr ResultCode ERR_RESERVED_VALUE{ErrorModule::Kernel, 126};
-constexpr ResultCode ERR_RESOURCE_LIMIT_EXCEEDED{ErrorModule::Kernel, 132};
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index 1a2fa9cd8..f96d34078 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -6,12 +6,12 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_results.h"
 
 namespace Kernel {
 namespace {
@@ -33,7 +33,7 @@ HandleTable::~HandleTable() = default;
 ResultCode HandleTable::SetSize(s32 handle_table_size) {
     if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
         LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT);
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
     }
 
     // Values less than or equal to zero indicate to use the maximum allowable
@@ -53,7 +53,7 @@ ResultVal<Handle> HandleTable::Create(std::shared_ptr<Object> obj) {
     const u16 slot = next_free_slot;
     if (slot >= table_size) {
         LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
-        return ERR_HANDLE_TABLE_FULL;
+        return ResultHandleTableFull;
     }
     next_free_slot = generations[slot];
 
@@ -76,7 +76,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
     std::shared_ptr<Object> object = GetGeneric(handle);
     if (object == nullptr) {
         LOG_ERROR(Kernel, "Tried to duplicate invalid handle: {:08X}", handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
     }
     return Create(std::move(object));
 }
@@ -84,7 +84,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
 ResultCode HandleTable::Close(Handle handle) {
     if (!IsValid(handle)) {
         LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
     }
 
     const u16 slot = GetSlot(handle);
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 7ec62cf18..161d9f782 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -14,7 +14,6 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/k_readable_event.h"
@@ -26,6 +25,7 @@
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/server_session.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/memory.h"
 
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
index d0e90fd60..7018f56da 100644
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -120,10 +120,10 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32
         s32 user_value{};
         if (!UpdateIfEqual(system, &user_value, addr, value, value + 1)) {
             LOG_ERROR(Kernel, "Invalid current memory!");
-            return Svc::ResultInvalidCurrentMemory;
+            return ResultInvalidCurrentMemory;
         }
         if (user_value != value) {
-            return Svc::ResultInvalidState;
+            return ResultInvalidState;
         }
 
         auto it = thread_tree.nfind_light({addr, -1});
@@ -189,10 +189,10 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32
 
         if (!succeeded) {
             LOG_ERROR(Kernel, "Invalid current memory!");
-            return Svc::ResultInvalidCurrentMemory;
+            return ResultInvalidCurrentMemory;
         }
         if (user_value != value) {
-            return Svc::ResultInvalidState;
+            return ResultInvalidState;
         }
 
         while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
@@ -221,11 +221,11 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement
         // Check that the thread isn't terminating.
         if (cur_thread->IsTerminationRequested()) {
             slp.CancelSleep();
-            return Svc::ResultTerminationRequested;
+            return ResultTerminationRequested;
         }
 
         // Set the synced object.
-        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+        cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
 
         // Read the value from userspace.
         s32 user_value{};
@@ -238,19 +238,19 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement
 
         if (!succeeded) {
             slp.CancelSleep();
-            return Svc::ResultInvalidCurrentMemory;
+            return ResultInvalidCurrentMemory;
         }
 
         // Check that the value is less than the specified one.
         if (user_value >= value) {
             slp.CancelSleep();
-            return Svc::ResultInvalidState;
+            return ResultInvalidState;
         }
 
         // Check that the timeout is non-zero.
         if (timeout == 0) {
             slp.CancelSleep();
-            return Svc::ResultTimedOut;
+            return ResultTimedOut;
         }
 
         // Set the arbiter.
@@ -288,29 +288,29 @@ ResultCode KAddressArbiter::WaitIfEqual(VAddr addr, s32 value, s64 timeout) {
         // Check that the thread isn't terminating.
         if (cur_thread->IsTerminationRequested()) {
             slp.CancelSleep();
-            return Svc::ResultTerminationRequested;
+            return ResultTerminationRequested;
         }
 
         // Set the synced object.
-        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+        cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
 
         // Read the value from userspace.
         s32 user_value{};
         if (!ReadFromUser(system, &user_value, addr)) {
             slp.CancelSleep();
-            return Svc::ResultInvalidCurrentMemory;
+            return ResultInvalidCurrentMemory;
         }
 
         // Check that the value is equal.
         if (value != user_value) {
             slp.CancelSleep();
-            return Svc::ResultInvalidState;
+            return ResultInvalidState;
         }
 
         // Check that the timeout is non-zero.
         if (timeout == 0) {
             slp.CancelSleep();
-            return Svc::ResultTimedOut;
+            return ResultTimedOut;
         }
 
         // Set the arbiter.
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
index f0ad8b390..170d8fa0d 100644
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -92,10 +92,10 @@ ResultCode KConditionVariable::SignalToAddress(VAddr addr) {
         // Write the value to userspace.
         if (!WriteToUser(system, addr, std::addressof(next_value))) {
             if (next_owner_thread) {
-                next_owner_thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory);
+                next_owner_thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory);
             }
 
-            return Svc::ResultInvalidCurrentMemory;
+            return ResultInvalidCurrentMemory;
         }
     }
 
@@ -114,20 +114,20 @@ ResultCode KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 val
             cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
 
             // Check if the thread should terminate.
-            R_UNLESS(!cur_thread->IsTerminationRequested(), Svc::ResultTerminationRequested);
+            R_UNLESS(!cur_thread->IsTerminationRequested(), ResultTerminationRequested);
 
             {
                 // Read the tag from userspace.
                 u32 test_tag{};
                 R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr),
-                         Svc::ResultInvalidCurrentMemory);
+                         ResultInvalidCurrentMemory);
 
                 // If the tag isn't the handle (with wait mask), we're done.
                 R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS);
 
                 // Get the lock owner thread.
                 owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<KThread>(handle);
-                R_UNLESS(owner_thread, Svc::ResultInvalidHandle);
+                R_UNLESS(owner_thread, ResultInvalidHandle);
 
                 // Update the lock.
                 cur_thread->SetAddressKey(addr, value);
@@ -191,13 +191,13 @@ KThread* KConditionVariable::SignalImpl(KThread* thread) {
                 thread_to_close = owner_thread.get();
             } else {
                 // The lock was tagged with a thread that doesn't exist.
-                thread->SetSyncedObject(nullptr, Svc::ResultInvalidState);
+                thread->SetSyncedObject(nullptr, ResultInvalidState);
                 thread->Wakeup();
             }
         }
     } else {
         // If the address wasn't accessible, note so.
-        thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory);
+        thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory);
         thread->Wakeup();
     }
 
@@ -263,12 +263,12 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout)
         KScopedSchedulerLockAndSleep slp{kernel, cur_thread, timeout};
 
         // Set the synced object.
-        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+        cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
 
         // Check that the thread isn't terminating.
         if (cur_thread->IsTerminationRequested()) {
             slp.CancelSleep();
-            return Svc::ResultTerminationRequested;
+            return ResultTerminationRequested;
         }
 
         // Update the value and process for the next owner.
@@ -302,7 +302,7 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout)
             // Write the value to userspace.
             if (!WriteToUser(system, addr, std::addressof(next_value))) {
                 slp.CancelSleep();
-                return Svc::ResultInvalidCurrentMemory;
+                return ResultInvalidCurrentMemory;
             }
         }
 
diff --git a/src/core/hle/kernel/k_readable_event.cpp b/src/core/hle/kernel/k_readable_event.cpp
index d8a42dbaf..4b4d34857 100644
--- a/src/core/hle/kernel/k_readable_event.cpp
+++ b/src/core/hle/kernel/k_readable_event.cpp
@@ -6,7 +6,6 @@
 #include "common/assert.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/k_readable_event.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_thread.h"
@@ -47,7 +46,7 @@ ResultCode KReadableEvent::Reset() {
     KScopedSchedulerLock lk{kernel};
 
     if (!is_signaled) {
-        return Svc::ResultInvalidState;
+        return ResultInvalidState;
     }
 
     is_signaled = false;
diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp
index ab2ab683f..d7a4a38e6 100644
--- a/src/core/hle/kernel/k_resource_limit.cpp
+++ b/src/core/hle/kernel/k_resource_limit.cpp
@@ -75,7 +75,7 @@ s64 KResourceLimit::GetFreeValue(LimitableResource which) const {
 ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) {
     const auto index = static_cast<std::size_t>(which);
     KScopedLightLock lk(lock);
-    R_UNLESS(current_values[index] <= value, Svc::ResultInvalidState);
+    R_UNLESS(current_values[index] <= value, ResultInvalidState);
 
     limit_values[index] = value;
 
diff --git a/src/core/hle/kernel/k_scoped_resource_reservation.h b/src/core/hle/kernel/k_scoped_resource_reservation.h
new file mode 100644
index 000000000..c5deca00b
--- /dev/null
+++ b/src/core/hle/kernel/k_scoped_resource_reservation.h
@@ -0,0 +1,67 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// This file references various implementation details from Atmosphere, an open-source firmware for
+// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "core/hle/kernel/k_resource_limit.h"
+#include "core/hle/kernel/process.h"
+
+namespace Kernel {
+
+class KScopedResourceReservation {
+public:
+    explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r,
+                                        s64 v, s64 timeout)
+        : resource_limit(std::move(l)), value(v), resource(r) {
+        if (resource_limit && value) {
+            success = resource_limit->Reserve(resource, value, timeout);
+        } else {
+            success = true;
+        }
+    }
+
+    explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r,
+                                        s64 v = 1)
+        : resource_limit(std::move(l)), value(v), resource(r) {
+        if (resource_limit && value) {
+            success = resource_limit->Reserve(resource, value);
+        } else {
+            success = true;
+        }
+    }
+
+    explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v, s64 t)
+        : KScopedResourceReservation(p->GetResourceLimit(), r, v, t) {}
+
+    explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v = 1)
+        : KScopedResourceReservation(p->GetResourceLimit(), r, v) {}
+
+    ~KScopedResourceReservation() noexcept {
+        if (resource_limit && value && success) {
+            // resource was not committed, release the reservation.
+            resource_limit->Release(resource, value);
+        }
+    }
+
+    /// Commit the resource reservation, destruction of this object does not release the resource
+    void Commit() {
+        resource_limit = nullptr;
+    }
+
+    [[nodiscard]] bool Succeeded() const {
+        return success;
+    }
+
+private:
+    std::shared_ptr<KResourceLimit> resource_limit;
+    s64 value;
+    LimitableResource resource;
+    bool success;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp
index 140cc46a7..82f72a0fe 100644
--- a/src/core/hle/kernel/k_synchronization_object.cpp
+++ b/src/core/hle/kernel/k_synchronization_object.cpp
@@ -40,20 +40,20 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
         // Check if the timeout is zero.
         if (timeout == 0) {
             slp.CancelSleep();
-            return Svc::ResultTimedOut;
+            return ResultTimedOut;
         }
 
         // Check if the thread should terminate.
         if (thread->IsTerminationRequested()) {
             slp.CancelSleep();
-            return Svc::ResultTerminationRequested;
+            return ResultTerminationRequested;
         }
 
         // Check if waiting was canceled.
         if (thread->IsWaitCancelled()) {
             slp.CancelSleep();
             thread->ClearWaitCancelled();
-            return Svc::ResultCancelled;
+            return ResultCancelled;
         }
 
         // Add the waiters.
@@ -75,7 +75,7 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
 
         // Mark the thread as waiting.
         thread->SetCancellable();
-        thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+        thread->SetSyncedObject(nullptr, ResultTimedOut);
         thread->SetState(ThreadState::Waiting);
         thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization);
     }
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index b59259c4f..e5620da5a 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -18,7 +18,6 @@
 #include "core/core.h"
 #include "core/cpu_manager.h"
 #include "core/hardware_properties.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/k_condition_variable.h"
 #include "core/hle/kernel/k_resource_limit.h"
@@ -127,7 +126,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s
 
     // Set core ID and wait result.
     core_id = phys_core;
-    wait_result = Svc::ResultNoSynchronizationObject;
+    wait_result = ResultNoSynchronizationObject;
 
     // Set priorities.
     priority = prio;
@@ -238,7 +237,7 @@ void KThread::Finalize() {
         while (it != waiter_list.end()) {
             // The thread shouldn't be a kernel waiter.
             it->SetLockOwner(nullptr);
-            it->SetSyncedObject(nullptr, Svc::ResultInvalidState);
+            it->SetSyncedObject(nullptr, ResultInvalidState);
             it->Wakeup();
             it = waiter_list.erase(it);
         }
@@ -447,7 +446,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) {
         // If the core id is no-update magic, preserve the ideal core id.
         if (core_id == Svc::IdealCoreNoUpdate) {
             core_id = virtual_ideal_core_id;
-            R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, Svc::ResultInvalidCombination);
+            R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, ResultInvalidCombination);
         }
 
         // Set the virtual core/affinity mask.
@@ -526,7 +525,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) {
                 if (GetStackParameters().is_pinned) {
                     // Verify that the current thread isn't terminating.
                     R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
-                             Svc::ResultTerminationRequested);
+                             ResultTerminationRequested);
 
                     // Note that the thread was pinned.
                     thread_is_pinned = true;
@@ -604,7 +603,7 @@ void KThread::WaitCancel() {
             sleeping_queue->WakeupThread(this);
             wait_cancelled = true;
         } else {
-            SetSyncedObject(nullptr, Svc::ResultCancelled);
+            SetSyncedObject(nullptr, ResultCancelled);
             SetState(ThreadState::Runnable);
             wait_cancelled = false;
         }
@@ -663,12 +662,12 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
         // Verify our state.
         const auto cur_state = GetState();
         R_UNLESS((cur_state == ThreadState::Waiting || cur_state == ThreadState::Runnable),
-                 Svc::ResultInvalidState);
+                 ResultInvalidState);
 
         // Either pause or resume.
         if (activity == Svc::ThreadActivity::Paused) {
             // Verify that we're not suspended.
-            R_UNLESS(!IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState);
+            R_UNLESS(!IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
 
             // Suspend.
             RequestSuspend(SuspendType::Thread);
@@ -676,7 +675,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
             ASSERT(activity == Svc::ThreadActivity::Runnable);
 
             // Verify that we're suspended.
-            R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState);
+            R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
 
             // Resume.
             Resume(SuspendType::Thread);
@@ -698,7 +697,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
             if (GetStackParameters().is_pinned) {
                 // Verify that the current thread isn't terminating.
                 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
-                         Svc::ResultTerminationRequested);
+                         ResultTerminationRequested);
 
                 // Note that the thread was pinned and not current.
                 thread_is_pinned = true;
@@ -745,7 +744,7 @@ ResultCode KThread::GetThreadContext3(std::vector<u8>& out) {
         KScopedSchedulerLock sl{kernel};
 
         // Verify that we're suspended.
-        R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState);
+        R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
 
         // If we're not terminating, get the thread's user context.
         if (!IsTerminationRequested()) {
@@ -905,12 +904,11 @@ ResultCode KThread::Run() {
         KScopedSchedulerLock lk{kernel};
 
         // If either this thread or the current thread are requesting termination, note it.
-        R_UNLESS(!IsTerminationRequested(), Svc::ResultTerminationRequested);
-        R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
-                 Svc::ResultTerminationRequested);
+        R_UNLESS(!IsTerminationRequested(), ResultTerminationRequested);
+        R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), ResultTerminationRequested);
 
         // Ensure our thread state is correct.
-        R_UNLESS(GetState() == ThreadState::Initialized, Svc::ResultInvalidState);
+        R_UNLESS(GetState() == ThreadState::Initialized, ResultInvalidState);
 
         // If the current thread has been asked to suspend, suspend it and retry.
         if (GetCurrentThread(kernel).IsSuspended()) {
@@ -962,7 +960,7 @@ ResultCode KThread::Sleep(s64 timeout) {
         // Check if the thread should terminate.
         if (IsTerminationRequested()) {
             slp.CancelSleep();
-            return Svc::ResultTerminationRequested;
+            return ResultTerminationRequested;
         }
 
         // Mark the thread as waiting.
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index b20c2d13a..b6e6f115e 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -26,7 +26,6 @@
 #include "core/device_memory.h"
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/client_port.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/k_resource_limit.h"
 #include "core/hle/kernel/k_scheduler.h"
@@ -39,6 +38,7 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/service_thread.h"
 #include "core/hle/kernel/shared_memory.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
@@ -141,11 +141,17 @@ struct KernelCore::Impl {
         ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Events, 700).IsSuccess());
         ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemory, 200)
                    .IsSuccess());
-        ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 900).IsSuccess());
+        ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 933).IsSuccess());
 
-        if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, 0x60000)) {
+        // Derived from recent software updates. The kernel reserves 27MB
+        constexpr u64 kernel_size{0x1b00000};
+        if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, kernel_size)) {
             UNREACHABLE();
         }
+        // Reserve secure applet memory, introduced in firmware 5.0.0
+        constexpr u64 secure_applet_memory_size{0x400000};
+        ASSERT(system_resource_limit->Reserve(LimitableResource::PhysicalMemory,
+                                              secure_applet_memory_size));
     }
 
     void InitializePreemption(KernelCore& kernel) {
@@ -302,8 +308,11 @@ struct KernelCore::Impl {
         // Allocate slab heaps
         user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>();
 
+        constexpr u64 user_slab_heap_size{0x1ef000};
+        // Reserve slab heaps
+        ASSERT(
+            system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size));
         // Initialize slab heaps
-        constexpr u64 user_slab_heap_size{0x3de000};
         user_slab_heap_pages->Initialize(
             system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase),
             user_slab_heap_size);
diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/memory/memory_manager.cpp
index acf13585c..77f135cdc 100644
--- a/src/core/hle/kernel/memory/memory_manager.cpp
+++ b/src/core/hle/kernel/memory/memory_manager.cpp
@@ -8,9 +8,9 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/scope_exit.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/memory/memory_manager.h"
 #include "core/hle/kernel/memory/page_linked_list.h"
+#include "core/hle/kernel/svc_results.h"
 
 namespace Kernel::Memory {
 
@@ -95,7 +95,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
     // Choose a heap based on our page size request
     const s32 heap_index{PageHeap::GetBlockIndex(num_pages)};
     if (heap_index < 0) {
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
     }
 
     // TODO (bunnei): Support multiple managers
@@ -140,7 +140,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
 
     // Only succeed if we allocated as many pages as we wanted
     if (num_pages) {
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
     }
 
     // We succeeded!
diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/memory/page_table.cpp
index 7de91c768..00ed9b881 100644
--- a/src/core/hle/kernel/memory/page_table.cpp
+++ b/src/core/hle/kernel/memory/page_table.cpp
@@ -6,8 +6,7 @@
 #include "common/assert.h"
 #include "common/scope_exit.h"
 #include "core/core.h"
-#include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/k_resource_limit.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/address_space_info.h"
 #include "core/hle/kernel/memory/memory_block.h"
@@ -16,6 +15,7 @@
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/memory/system_control.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/memory.h"
 
 namespace Kernel::Memory {
@@ -141,7 +141,7 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
         (alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)};
     if (alloc_size < needed_size) {
         UNREACHABLE();
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
     }
 
     const std::size_t remaining_size{alloc_size - needed_size};
@@ -277,11 +277,11 @@ ResultCode PageTable::MapProcessCode(VAddr addr, std::size_t num_pages, MemorySt
     const u64 size{num_pages * PageSize};
 
     if (!CanContain(addr, size, state)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     if (IsRegionMapped(addr, size)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     PageLinkedList page_linked_list;
@@ -307,7 +307,7 @@ ResultCode PageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::
                                   MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
 
     if (IsRegionMapped(dst_addr, size)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     PageLinkedList page_linked_list;
@@ -409,27 +409,25 @@ ResultCode PageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
         return RESULT_SUCCESS;
     }
 
-    auto process{system.Kernel().CurrentProcess()};
     const std::size_t remaining_size{size - mapped_size};
     const std::size_t remaining_pages{remaining_size / PageSize};
 
-    if (process->GetResourceLimit() &&
-        !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, remaining_size)) {
-        return ERR_RESOURCE_LIMIT_EXCEEDED;
+    // Reserve the memory from the process resource limit.
+    KScopedResourceReservation memory_reservation(
+        system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
+        remaining_size);
+    if (!memory_reservation.Succeeded()) {
+        LOG_ERROR(Kernel, "Could not reserve remaining {:X} bytes", remaining_size);
+        return ResultResourceLimitedExceeded;
     }
 
     PageLinkedList page_linked_list;
-    {
-        auto block_guard = detail::ScopeExit([&] {
-            system.Kernel().MemoryManager().Free(page_linked_list, remaining_pages, memory_pool);
-            process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, remaining_size);
-        });
 
-        CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages,
-                                                              memory_pool));
+    CASCADE_CODE(
+        system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, memory_pool));
 
-        block_guard.Cancel();
-    }
+    // We succeeded, so commit the memory reservation.
+    memory_reservation.Commit();
 
     MapPhysicalMemory(page_linked_list, addr, end_addr);
 
@@ -454,12 +452,12 @@ ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
     block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) {
         if (info.state == MemoryState::Normal) {
             if (info.attribute != MemoryAttribute::None) {
-                result = ERR_INVALID_ADDRESS_STATE;
+                result = ResultInvalidCurrentMemory;
                 return;
             }
             mapped_size += GetSizeInRange(info, addr, end_addr);
         } else if (info.state != MemoryState::Free) {
-            result = ERR_INVALID_ADDRESS_STATE;
+            result = ResultInvalidCurrentMemory;
         }
     });
 
@@ -526,7 +524,7 @@ ResultCode PageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
         MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
 
     if (IsRegionMapped(dst_addr, size)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     PageLinkedList page_linked_list;
@@ -577,7 +575,7 @@ ResultCode PageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) {
     AddRegionToPages(dst_addr, num_pages, dst_pages);
 
     if (!dst_pages.IsEqual(src_pages)) {
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     {
@@ -626,11 +624,11 @@ ResultCode PageTable::MapPages(VAddr addr, PageLinkedList& page_linked_list, Mem
     const std::size_t size{num_pages * PageSize};
 
     if (!CanContain(addr, size, state)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     if (IsRegionMapped(addr, num_pages * PageSize)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     CASCADE_CODE(MapPages(addr, page_linked_list, perm));
@@ -768,7 +766,7 @@ ResultCode PageTable::SetHeapCapacity(std::size_t new_heap_capacity) {
 ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
 
     if (size > heap_region_end - heap_region_start) {
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
     }
 
     const u64 previous_heap_size{GetHeapSize()};
@@ -781,10 +779,14 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
 
         const u64 delta{size - previous_heap_size};
 
-        auto process{system.Kernel().CurrentProcess()};
-        if (process->GetResourceLimit() && delta != 0 &&
-            !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, delta)) {
-            return ERR_RESOURCE_LIMIT_EXCEEDED;
+        // Reserve memory for the heap extension.
+        KScopedResourceReservation memory_reservation(
+            system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
+            delta);
+
+        if (!memory_reservation.Succeeded()) {
+            LOG_ERROR(Kernel, "Could not reserve heap extension of size {:X} bytes", delta);
+            return ResultResourceLimitedExceeded;
         }
 
         PageLinkedList page_linked_list;
@@ -794,12 +796,15 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
             system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool));
 
         if (IsRegionMapped(current_heap_addr, delta)) {
-            return ERR_INVALID_ADDRESS_STATE;
+            return ResultInvalidCurrentMemory;
         }
 
         CASCADE_CODE(
             Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup));
 
+        // Succeeded in allocation, commit the resource reservation
+        memory_reservation.Commit();
+
         block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal,
                               MemoryPermission::ReadAndWrite);
 
@@ -816,17 +821,17 @@ ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, s
     std::lock_guard lock{page_table_lock};
 
     if (!CanContain(region_start, region_num_pages * PageSize, state)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     if (region_num_pages <= needed_num_pages) {
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
     }
 
     const VAddr addr{
         AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)};
     if (!addr) {
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
     }
 
     if (is_map_only) {
@@ -1105,13 +1110,13 @@ constexpr ResultCode PageTable::CheckMemoryState(const MemoryInfo& info, MemoryS
                                                  MemoryAttribute attr) const {
     // Validate the states match expectation
     if ((info.state & state_mask) != state) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
     if ((info.perm & perm_mask) != perm) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
     if ((info.attribute & attr_mask) != attr) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     return RESULT_SUCCESS;
@@ -1138,14 +1143,14 @@ ResultCode PageTable::CheckMemoryState(MemoryState* out_state, MemoryPermission*
     while (true) {
         // Validate the current block
         if (!(info.state == first_state)) {
-            return ERR_INVALID_ADDRESS_STATE;
+            return ResultInvalidCurrentMemory;
         }
         if (!(info.perm == first_perm)) {
-            return ERR_INVALID_ADDRESS_STATE;
+            return ResultInvalidCurrentMemory;
         }
         if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) ==
               (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) {
-            return ERR_INVALID_ADDRESS_STATE;
+            return ResultInvalidCurrentMemory;
         }
 
         // Validate against the provided masks
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 2286b292d..47b3ac57b 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -14,9 +14,9 @@
 #include "core/device_memory.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/code_set.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/k_resource_limit.h"
 #include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
 #include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/memory_block_manager.h"
@@ -39,6 +39,7 @@ namespace {
  */
 void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) {
     const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart();
+    ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1));
     auto thread_res = KThread::Create(system, ThreadType::User, "main", entry_point, priority, 0,
                                       owner_process.GetIdealCoreId(), stack_top, &owner_process);
 
@@ -117,6 +118,9 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name,
 
     std::shared_ptr<Process> process = std::make_shared<Process>(system);
     process->name = std::move(name);
+
+    // TODO: This is inaccurate
+    // The process should hold a reference to the kernel-wide resource limit.
     process->resource_limit = std::make_shared<KResourceLimit>(kernel, system);
     process->status = ProcessStatus::Created;
     process->program_id = 0;
@@ -155,6 +159,9 @@ void Process::DecrementThreadCount() {
 }
 
 u64 Process::GetTotalPhysicalMemoryAvailable() const {
+    // TODO: This is expected to always return the application memory pool size after accurately
+    // reserving kernel resources. The current workaround uses a process-local resource limit of
+    // application memory pool size, which is inaccurate.
     const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) +
                        page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
                        main_thread_stack_size};
@@ -248,8 +255,8 @@ ResultCode Process::Reset() {
     KScopedSchedulerLock sl{kernel};
 
     // Validate that we're in a state that we can reset.
-    R_UNLESS(status != ProcessStatus::Exited, Svc::ResultInvalidState);
-    R_UNLESS(is_signaled, Svc::ResultInvalidState);
+    R_UNLESS(status != ProcessStatus::Exited, ResultInvalidState);
+    R_UNLESS(is_signaled, ResultInvalidState);
 
     // Clear signaled.
     is_signaled = false;
@@ -264,6 +271,17 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
     system_resource_size = metadata.GetSystemResourceSize();
     image_size = code_size;
 
+    // Set initial resource limits
+    resource_limit->SetLimitValue(
+        LimitableResource::PhysicalMemory,
+        kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application));
+    KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory,
+                                                  code_size + system_resource_size);
+    if (!memory_reservation.Succeeded()) {
+        LOG_ERROR(Kernel, "Could not reserve process memory requirements of size {:X} bytes",
+                  code_size + system_resource_size);
+        return ResultResourceLimitedExceeded;
+    }
     // Initialize proces address space
     if (const ResultCode result{
             page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000,
@@ -305,24 +323,22 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
         UNREACHABLE();
     }
 
-    // Set initial resource limits
-    resource_limit->SetLimitValue(
-        LimitableResource::PhysicalMemory,
-        kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application));
     resource_limit->SetLimitValue(LimitableResource::Threads, 608);
     resource_limit->SetLimitValue(LimitableResource::Events, 700);
     resource_limit->SetLimitValue(LimitableResource::TransferMemory, 128);
     resource_limit->SetLimitValue(LimitableResource::Sessions, 894);
-    ASSERT(resource_limit->Reserve(LimitableResource::PhysicalMemory, code_size));
 
     // Create TLS region
     tls_region_address = CreateTLSRegion();
+    memory_reservation.Commit();
 
     return handle_table.SetSize(capabilities.GetHandleTableSize());
 }
 
 void Process::Run(s32 main_thread_priority, u64 stack_size) {
     AllocateMainThreadStack(stack_size);
+    resource_limit->Reserve(LimitableResource::Threads, 1);
+    resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
 
     const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size};
     ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError());
@@ -330,8 +346,6 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) {
     ChangeStatus(ProcessStatus::Running);
 
     SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top);
-    resource_limit->Reserve(LimitableResource::Threads, 1);
-    resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
 }
 
 void Process::PrepareForTermination() {
@@ -358,6 +372,11 @@ void Process::PrepareForTermination() {
     FreeTLSRegion(tls_region_address);
     tls_region_address = 0;
 
+    if (resource_limit) {
+        resource_limit->Release(LimitableResource::PhysicalMemory,
+                                main_thread_stack_size + image_size);
+    }
+
     ChangeStatus(ProcessStatus::Exited);
 }
 
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 0566311b6..7c567049e 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -6,10 +6,10 @@
 
 #include "common/bit_util.h"
 #include "common/logging/log.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/process_capability.h"
+#include "core/hle/kernel/svc_results.h"
 
 namespace Kernel {
 namespace {
@@ -123,13 +123,13 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities,
             // If there's only one, then there's a problem.
             if (i >= num_capabilities) {
                 LOG_ERROR(Kernel, "Invalid combination! i={}", i);
-                return ERR_INVALID_COMBINATION;
+                return ResultInvalidCombination;
             }
 
             const auto size_flags = capabilities[i];
             if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) {
                 LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags);
-                return ERR_INVALID_COMBINATION;
+                return ResultInvalidCombination;
             }
 
             const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table);
@@ -159,7 +159,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
     const auto type = GetCapabilityType(flag);
 
     if (type == CapabilityType::Unset) {
-        return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+        return ResultInvalidCapabilityDescriptor;
     }
 
     // Bail early on ignorable entries, as one would expect,
@@ -176,7 +176,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
         LOG_ERROR(Kernel,
                   "Attempted to initialize flags that may only be initialized once. set_flags={}",
                   set_flags);
-        return ERR_INVALID_COMBINATION;
+        return ResultInvalidCombination;
     }
     set_flags |= set_flag;
 
@@ -202,7 +202,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
     }
 
     LOG_ERROR(Kernel, "Invalid capability type! type={}", type);
-    return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+    return ResultInvalidCapabilityDescriptor;
 }
 
 void ProcessCapabilities::Clear() {
@@ -225,7 +225,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
     if (priority_mask != 0 || core_mask != 0) {
         LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}",
                   priority_mask, core_mask);
-        return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+        return ResultInvalidCapabilityDescriptor;
     }
 
     const u32 core_num_min = (flags >> 16) & 0xFF;
@@ -233,7 +233,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
     if (core_num_min > core_num_max) {
         LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}",
                   core_num_min, core_num_max);
-        return ERR_INVALID_COMBINATION;
+        return ResultInvalidCombination;
     }
 
     const u32 priority_min = (flags >> 10) & 0x3F;
@@ -242,13 +242,13 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
         LOG_ERROR(Kernel,
                   "Priority min is greater than priority max! priority_min={}, priority_max={}",
                   core_num_min, priority_max);
-        return ERR_INVALID_COMBINATION;
+        return ResultInvalidCombination;
     }
 
     // The switch only has 4 usable cores.
     if (core_num_max >= 4) {
         LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max);
-        return ERR_INVALID_PROCESSOR_ID;
+        return ResultInvalidCoreId;
     }
 
     const auto make_mask = [](u64 min, u64 max) {
@@ -269,7 +269,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)
 
     // If we've already set this svc before, bail.
     if ((set_svc_bits & svc_bit) != 0) {
-        return ERR_INVALID_COMBINATION;
+        return ResultInvalidCombination;
     }
     set_svc_bits |= svc_bit;
 
@@ -283,7 +283,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)
 
         if (svc_number >= svc_capabilities.size()) {
             LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number);
-            return ERR_OUT_OF_RANGE;
+            return ResultOutOfRange;
         }
 
         svc_capabilities[svc_number] = true;
@@ -321,7 +321,7 @@ ResultCode ProcessCapabilities::HandleInterruptFlags(u32 flags) {
         if (interrupt >= interrupt_capabilities.size()) {
             LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}",
                       interrupt);
-            return ERR_OUT_OF_RANGE;
+            return ResultOutOfRange;
         }
 
         interrupt_capabilities[interrupt] = true;
@@ -334,7 +334,7 @@ ResultCode ProcessCapabilities::HandleProgramTypeFlags(u32 flags) {
     const u32 reserved = flags >> 17;
     if (reserved != 0) {
         LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
-        return ERR_RESERVED_VALUE;
+        return ResultReservedValue;
     }
 
     program_type = static_cast<ProgramType>((flags >> 14) & 0b111);
@@ -354,7 +354,7 @@ ResultCode ProcessCapabilities::HandleKernelVersionFlags(u32 flags) {
         LOG_ERROR(Kernel,
                   "Kernel version is non zero or flags are too small! major_version={}, flags={}",
                   major_version, flags);
-        return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+        return ResultInvalidCapabilityDescriptor;
     }
 
     kernel_version = flags;
@@ -365,7 +365,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
     const u32 reserved = flags >> 26;
     if (reserved != 0) {
         LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
-        return ERR_RESERVED_VALUE;
+        return ResultReservedValue;
     }
 
     handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
@@ -376,7 +376,7 @@ ResultCode ProcessCapabilities::HandleDebugFlags(u32 flags) {
     const u32 reserved = flags >> 19;
     if (reserved != 0) {
         LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
-        return ERR_RESERVED_VALUE;
+        return ResultReservedValue;
     }
 
     is_debuggable = (flags & 0x20000) != 0;
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index fe7a483c4..5d17346ad 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -5,11 +5,11 @@
 #include <tuple>
 #include "common/assert.h"
 #include "core/hle/kernel/client_port.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/server_port.h"
 #include "core/hle/kernel/server_session.h"
+#include "core/hle/kernel/svc_results.h"
 
 namespace Kernel {
 
@@ -18,7 +18,7 @@ ServerPort::~ServerPort() = default;
 
 ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
     if (pending_sessions.empty()) {
-        return ERR_NOT_FOUND;
+        return ResultNotFound;
     }
 
     auto session = std::move(pending_sessions.back());
diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp
index 75304b961..8830d4e91 100644
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -4,15 +4,23 @@
 
 #include "common/assert.h"
 #include "core/hle/kernel/client_session.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
 #include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"
 
 namespace Kernel {
 
 Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {}
-Session::~Session() = default;
+Session::~Session() {
+    // Release reserved resource when the Session pair was created.
+    kernel.GetSystemResourceLimit()->Release(LimitableResource::Sessions, 1);
+}
 
 Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
+    // Reserve a new session from the resource limit.
+    KScopedResourceReservation session_reservation(kernel.GetSystemResourceLimit(),
+                                                   LimitableResource::Sessions);
+    ASSERT(session_reservation.Succeeded());
     auto session{std::make_shared<Session>(kernel)};
     auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()};
     auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()};
@@ -21,6 +29,7 @@ Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
     session->client = client_session;
     session->server = server_session;
 
+    session_reservation.Commit();
     return std::make_pair(std::move(client_session), std::move(server_session));
 }
 
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 0cd467110..2eadd51d7 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -4,6 +4,7 @@
 
 #include "common/assert.h"
 #include "core/core.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/shared_memory.h"
@@ -13,7 +14,9 @@ namespace Kernel {
 SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory)
     : Object{kernel}, device_memory{device_memory} {}
 
-SharedMemory::~SharedMemory() = default;
+SharedMemory::~SharedMemory() {
+    kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemory, size);
+}
 
 std::shared_ptr<SharedMemory> SharedMemory::Create(
     KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process,
@@ -21,6 +24,11 @@ std::shared_ptr<SharedMemory> SharedMemory::Create(
     Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size,
     std::string name) {
 
+    const auto resource_limit = kernel.GetSystemResourceLimit();
+    KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory,
+                                                  size);
+    ASSERT(memory_reservation.Succeeded());
+
     std::shared_ptr<SharedMemory> shared_memory{
         std::make_shared<SharedMemory>(kernel, device_memory)};
 
@@ -32,6 +40,7 @@ std::shared_ptr<SharedMemory> SharedMemory::Create(
     shared_memory->size = size;
     shared_memory->name = name;
 
+    memory_reservation.Commit();
     return shared_memory;
 }
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 26650a513..31d899e06 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -23,7 +23,6 @@
 #include "core/cpu_manager.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/k_address_arbiter.h"
 #include "core/hle/kernel/k_condition_variable.h"
@@ -31,6 +30,7 @@
 #include "core/hle/kernel/k_readable_event.h"
 #include "core/hle/kernel/k_resource_limit.h"
 #include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
 #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
 #include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/k_thread.h"
@@ -71,49 +71,49 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
                                       VAddr src_addr, u64 size) {
     if (!Common::Is4KBAligned(dst_addr)) {
         LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (!Common::Is4KBAligned(src_addr)) {
         LOG_ERROR(Kernel_SVC, "Source address is not aligned to 4KB, 0x{:016X}", src_addr);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (size == 0) {
         LOG_ERROR(Kernel_SVC, "Size is 0");
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (!Common::Is4KBAligned(size)) {
         LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (!IsValidAddressRange(dst_addr, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Destination is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
                   dst_addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     if (!IsValidAddressRange(src_addr, size)) {
         LOG_ERROR(Kernel_SVC, "Source is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
                   src_addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     if (!manager.IsInsideAddressSpace(src_addr, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
                   src_addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     if (manager.IsOutsideStackRegion(dst_addr, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}",
                   dst_addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     if (manager.IsInsideHeapRegion(dst_addr, size)) {
@@ -121,7 +121,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
                   "Destination does not fit within the heap region, addr=0x{:016X}, "
                   "size=0x{:016X}",
                   dst_addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     if (manager.IsInsideAliasRegion(dst_addr, size)) {
@@ -129,7 +129,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
                   "Destination does not fit within the map region, addr=0x{:016X}, "
                   "size=0x{:016X}",
                   dst_addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     return RESULT_SUCCESS;
@@ -138,6 +138,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
 enum class ResourceLimitValueType {
     CurrentValue,
     LimitValue,
+    PeakValue,
 };
 
 ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
@@ -146,7 +147,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
     const auto type = static_cast<LimitableResource>(resource_type);
     if (!IsValidResourceType(type)) {
         LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
-        return ERR_INVALID_ENUM_VALUE;
+        return ResultInvalidEnumValue;
     }
 
     const auto* const current_process = system.Kernel().CurrentProcess();
@@ -157,14 +158,20 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
     if (!resource_limit_object) {
         LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
                   resource_limit);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
     }
 
-    if (value_type == ResourceLimitValueType::CurrentValue) {
+    switch (value_type) {
+    case ResourceLimitValueType::CurrentValue:
         return MakeResult(resource_limit_object->GetCurrentValue(type));
+    case ResourceLimitValueType::LimitValue:
+        return MakeResult(resource_limit_object->GetLimitValue(type));
+    case ResourceLimitValueType::PeakValue:
+        return MakeResult(resource_limit_object->GetPeakValue(type));
+    default:
+        LOG_ERROR(Kernel_SVC, "Invalid resource value_type: '{}'", value_type);
+        return ResultInvalidEnumValue;
     }
-
-    return MakeResult(resource_limit_object->GetLimitValue(type));
 }
 } // Anonymous namespace
 
@@ -177,12 +184,12 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s
     if ((heap_size % 0x200000) != 0) {
         LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}",
                   heap_size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (heap_size >= 0x200000000) {
         LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
@@ -208,19 +215,19 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
 
     if (!Common::Is4KBAligned(address)) {
         LOG_ERROR(Kernel_SVC, "Address not page aligned (0x{:016X})", address);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (size == 0 || !Common::Is4KBAligned(size)) {
         LOG_ERROR(Kernel_SVC, "Invalid size (0x{:X}). Size must be non-zero and page aligned.",
                   size);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (!IsValidAddressRange(address, size)) {
         LOG_ERROR(Kernel_SVC, "Address range overflowed (Address: 0x{:016X}, Size: 0x{:016X})",
                   address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)};
@@ -229,7 +236,7 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
         LOG_ERROR(Kernel_SVC,
                   "Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}",
                   attribute, mask);
-        return ERR_INVALID_COMBINATION;
+        return ResultInvalidCombination;
     }
 
     auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
@@ -293,7 +300,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
         LOG_ERROR(Kernel_SVC,
                   "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}",
                   port_name_address);
-        return ERR_NOT_FOUND;
+        return ResultNotFound;
     }
 
     static constexpr std::size_t PortNameMaxLength = 11;
@@ -302,7 +309,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
     if (port_name.size() > PortNameMaxLength) {
         LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength,
                   port_name.size());
-        return ERR_OUT_OF_RANGE;
+        return ResultOutOfRange;
     }
 
     LOG_TRACE(Kernel_SVC, "called port_name={}", port_name);
@@ -311,11 +318,9 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
     const auto it = kernel.FindNamedPort(port_name);
     if (!kernel.IsValidNamedPort(it)) {
         LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name);
-        return ERR_NOT_FOUND;
+        return ResultNotFound;
     }
 
-    ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(LimitableResource::Sessions, 1));
-
     auto client_port = it->second;
 
     std::shared_ptr<ClientSession> client_session;
@@ -340,7 +345,7 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
     std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle);
     if (!session) {
         LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
     }
 
     LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
@@ -405,7 +410,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
         const Process* const owner_process = thread->GetOwnerProcess();
         if (!owner_process) {
             LOG_ERROR(Kernel_SVC, "Non-existent owning process encountered.");
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
         }
 
         *process_id = owner_process->GetProcessID();
@@ -415,7 +420,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
     // NOTE: This should also handle debug objects before returning.
 
     LOG_ERROR(Kernel_SVC, "Handle does not exist, handle=0x{:08X}", handle);
-    return ERR_INVALID_HANDLE;
+    return ResultInvalidHandle;
 }
 
 static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high,
@@ -438,7 +443,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
         LOG_ERROR(Kernel_SVC,
                   "Handle address is not a valid virtual address, handle_address=0x{:016X}",
                   handles_address);
-        return ERR_INVALID_POINTER;
+        return ResultInvalidPointer;
     }
 
     static constexpr u64 MaxHandles = 0x40;
@@ -446,7 +451,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
     if (handle_count > MaxHandles) {
         LOG_ERROR(Kernel_SVC, "Handle count specified is too large, expected {} but got {}",
                   MaxHandles, handle_count);
-        return ERR_OUT_OF_RANGE;
+        return ResultOutOfRange;
     }
 
     auto& kernel = system.Kernel();
@@ -459,7 +464,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
 
         if (object == nullptr) {
             LOG_ERROR(Kernel_SVC, "Object is a nullptr");
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
         }
 
         objects[i] = object.get();
@@ -481,6 +486,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
     // Get the thread from its handle.
     const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     std::shared_ptr<KThread> thread = handle_table.Get<KThread>(thread_handle);
+
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Invalid thread handle provided (handle={:08X})", thread_handle);
         return ResultInvalidHandle;
@@ -525,6 +531,7 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr address) {
     LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address);
 
     // Validate the input address.
+
     if (Memory::IsKernelAddress(address)) {
         LOG_ERROR(Kernel_SVC,
                   "Attempting to arbitrate an unlock on a kernel address (address={:08X})",
@@ -735,7 +742,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
         if (info_sub_id != 0) {
             LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
                       info_sub_id);
-            return ERR_INVALID_ENUM_VALUE;
+            return ResultInvalidEnumValue;
         }
 
         const auto& current_process_handle_table =
@@ -744,7 +751,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
         if (!process) {
             LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}",
                       info_id, info_sub_id, handle);
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
         }
 
         switch (info_id_type) {
@@ -826,7 +833,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
         }
 
         LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
-        return ERR_INVALID_ENUM_VALUE;
+        return ResultInvalidEnumValue;
     }
 
     case GetInfoType::IsCurrentProcessBeingDebugged:
@@ -836,13 +843,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
     case GetInfoType::RegisterResourceLimit: {
         if (handle != 0) {
             LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle);
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
         }
 
         if (info_sub_id != 0) {
             LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
                       info_sub_id);
-            return ERR_INVALID_COMBINATION;
+            return ResultInvalidCombination;
         }
 
         Process* const current_process = system.Kernel().CurrentProcess();
@@ -867,13 +874,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
         if (handle != 0) {
             LOG_ERROR(Kernel_SVC, "Process Handle is non zero, expected 0 result but got {:016X}",
                       handle);
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
         }
 
         if (info_sub_id >= Process::RANDOM_ENTROPY_SIZE) {
             LOG_ERROR(Kernel_SVC, "Entropy size is out of range, expected {} but got {}",
                       Process::RANDOM_ENTROPY_SIZE, info_sub_id);
-            return ERR_INVALID_COMBINATION;
+            return ResultInvalidCombination;
         }
 
         *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id);
@@ -890,7 +897,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
         if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) {
             LOG_ERROR(Kernel_SVC, "Core count is out of range, expected {} but got {}", num_cpus,
                       info_sub_id);
-            return ERR_INVALID_COMBINATION;
+            return ResultInvalidCombination;
         }
 
         const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<KThread>(
@@ -898,7 +905,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
         if (!thread) {
             LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}",
                       static_cast<Handle>(handle));
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
         }
 
         const auto& core_timing = system.CoreTiming();
@@ -922,7 +929,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
 
     default:
         LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
-        return ERR_INVALID_ENUM_VALUE;
+        return ResultInvalidEnumValue;
     }
 }
 
@@ -945,22 +952,22 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
 
     if (!Common::Is4KBAligned(addr)) {
         LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (!Common::Is4KBAligned(size)) {
         LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (size == 0) {
         LOG_ERROR(Kernel_SVC, "Size is zero");
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (!(addr < addr + size)) {
         LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     Process* const current_process{system.Kernel().CurrentProcess()};
@@ -968,21 +975,21 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
 
     if (current_process->GetSystemResourceSize() == 0) {
         LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
-        return ERR_INVALID_STATE;
+        return ResultInvalidState;
     }
 
     if (!page_table.IsInsideAddressSpace(addr, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
                   size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     if (page_table.IsOutsideAliasRegion(addr, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr,
                   size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     return page_table.MapPhysicalMemory(addr, size);
@@ -999,22 +1006,22 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
 
     if (!Common::Is4KBAligned(addr)) {
         LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (!Common::Is4KBAligned(size)) {
         LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (size == 0) {
         LOG_ERROR(Kernel_SVC, "Size is zero");
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (!(addr < addr + size)) {
         LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     Process* const current_process{system.Kernel().CurrentProcess()};
@@ -1022,21 +1029,21 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
 
     if (current_process->GetSystemResourceSize() == 0) {
         LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
-        return ERR_INVALID_STATE;
+        return ResultInvalidState;
     }
 
     if (!page_table.IsInsideAddressSpace(addr, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
                   size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     if (page_table.IsOutsideAliasRegion(addr, size)) {
         LOG_ERROR(Kernel_SVC,
                   "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr,
                   size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     return page_table.UnmapPhysicalMemory(addr, size);
@@ -1206,23 +1213,23 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
 
     if (!Common::Is4KBAligned(addr)) {
         LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (size == 0) {
         LOG_ERROR(Kernel_SVC, "Size is 0");
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (!Common::Is4KBAligned(size)) {
         LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (!IsValidAddressRange(addr, size)) {
         LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
                   addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     const auto permission_type = static_cast<Memory::MemoryPermission>(permissions);
@@ -1230,7 +1237,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
         Memory::MemoryPermission::ReadAndWrite) {
         LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}",
                   permissions);
-        return ERR_INVALID_MEMORY_PERMISSIONS;
+        return ResultInvalidMemoryPermissions;
     }
 
     auto* const current_process{system.Kernel().CurrentProcess()};
@@ -1241,7 +1248,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
                   "Addr does not fit within the valid region, addr=0x{:016X}, "
                   "size=0x{:016X}",
                   addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     if (page_table.IsInsideHeapRegion(addr, size)) {
@@ -1249,7 +1256,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
                   "Addr does not fit within the heap region, addr=0x{:016X}, "
                   "size=0x{:016X}",
                   addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     if (page_table.IsInsideAliasRegion(addr, size)) {
@@ -1257,14 +1264,14 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
                   "Address does not fit within the map region, addr=0x{:016X}, "
                   "size=0x{:016X}",
                   addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)};
     if (!shared_memory) {
         LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
                   shared_memory_handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
     }
 
     return shared_memory->Map(*current_process, addr, size, permission_type);
@@ -1285,7 +1292,7 @@ static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_add
     if (!process) {
         LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
                   process_handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
     }
 
     auto& memory{system.Memory()};
@@ -1332,18 +1339,18 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
     if (!Common::Is4KBAligned(src_address)) {
         LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
                   src_address);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (!Common::Is4KBAligned(dst_address)) {
         LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
                   dst_address);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (size == 0 || !Common::Is4KBAligned(size)) {
         LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (!IsValidAddressRange(dst_address, size)) {
@@ -1351,7 +1358,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
                   "Destination address range overflows the address space (dst_address=0x{:016X}, "
                   "size=0x{:016X}).",
                   dst_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     if (!IsValidAddressRange(src_address, size)) {
@@ -1359,7 +1366,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
                   "Source address range overflows the address space (src_address=0x{:016X}, "
                   "size=0x{:016X}).",
                   src_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1367,7 +1374,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
     if (!process) {
         LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
                   process_handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
     }
 
     auto& page_table = process->PageTable();
@@ -1376,7 +1383,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
                   "Source address range is not within the address space (src_address=0x{:016X}, "
                   "size=0x{:016X}).",
                   src_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     if (!page_table.IsInsideASLRRegion(dst_address, size)) {
@@ -1384,7 +1391,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
                   "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
                   "size=0x{:016X}).",
                   dst_address, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     return page_table.MapProcessCodeMemory(dst_address, src_address, size);
@@ -1400,18 +1407,18 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
     if (!Common::Is4KBAligned(dst_address)) {
         LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
                   dst_address);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (!Common::Is4KBAligned(src_address)) {
         LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
                   src_address);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (size == 0 || Common::Is4KBAligned(size)) {
         LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
     }
 
     if (!IsValidAddressRange(dst_address, size)) {
@@ -1419,7 +1426,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
                   "Destination address range overflows the address space (dst_address=0x{:016X}, "
                   "size=0x{:016X}).",
                   dst_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     if (!IsValidAddressRange(src_address, size)) {
@@ -1427,7 +1434,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
                   "Source address range overflows the address space (src_address=0x{:016X}, "
                   "size=0x{:016X}).",
                   src_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1435,7 +1442,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
     if (!process) {
         LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
                   process_handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
     }
 
     auto& page_table = process->PageTable();
@@ -1444,7 +1451,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
                   "Source address range is not within the address space (src_address=0x{:016X}, "
                   "size=0x{:016X}).",
                   src_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     if (!page_table.IsInsideASLRRegion(dst_address, size)) {
@@ -1452,7 +1459,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
                   "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
                   "size=0x{:016X}).",
                   dst_address, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
     }
 
     return page_table.UnmapProcessCodeMemory(dst_address, src_address, size);
@@ -1515,8 +1522,13 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
         return ResultInvalidPriority;
     }
 
-    ASSERT(process.GetResourceLimit()->Reserve(
-        LimitableResource::Threads, 1, system.CoreTiming().GetGlobalTimeNs().count() + 100000000));
+    KScopedResourceReservation thread_reservation(
+        kernel.CurrentProcess(), LimitableResource::Threads, 1,
+        system.CoreTiming().GetGlobalTimeNs().count() + 100000000);
+    if (!thread_reservation.Succeeded()) {
+        LOG_ERROR(Kernel_SVC, "Could not reserve a new thread");
+        return ResultResourceLimitedExceeded;
+    }
 
     std::shared_ptr<KThread> thread;
     {
@@ -1536,6 +1548,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
     // Set the thread name for debugging purposes.
     thread->SetName(
         fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
+    thread_reservation.Commit();
 
     return RESULT_SUCCESS;
 }
@@ -1844,7 +1857,7 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) {
 
     LOG_ERROR(Kernel_SVC, "invalid handle (0x{:08X})", handle);
 
-    return Svc::ResultInvalidHandle;
+    return ResultInvalidHandle;
 }
 
 static ResultCode ResetSignal32(Core::System& system, Handle handle) {
@@ -1860,18 +1873,18 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
 
     if (!Common::Is4KBAligned(addr)) {
         LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (!Common::Is4KBAligned(size) || size == 0) {
         LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
     }
 
     if (!IsValidAddressRange(addr, size)) {
         LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})",
                   addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     const auto perms{static_cast<Memory::MemoryPermission>(permissions)};
@@ -1879,10 +1892,17 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
         perms == Memory::MemoryPermission::Write) {
         LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})",
                   permissions);
-        return ERR_INVALID_MEMORY_PERMISSIONS;
+        return ResultInvalidMemoryPermissions;
     }
 
     auto& kernel = system.Kernel();
+    // Reserve a new transfer memory from the process resource limit.
+    KScopedResourceReservation trmem_reservation(kernel.CurrentProcess(),
+                                                 LimitableResource::TransferMemory);
+    if (!trmem_reservation.Succeeded()) {
+        LOG_ERROR(Kernel_SVC, "Could not reserve a new transfer memory");
+        return ResultResourceLimitedExceeded;
+    }
     auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms);
 
     if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) {
@@ -1894,6 +1914,7 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
     if (result.Failed()) {
         return result.Code();
     }
+    trmem_reservation.Commit();
 
     *handle = *result;
     return RESULT_SUCCESS;
@@ -1989,7 +2010,6 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
         LOG_ERROR(Kernel_SVC, "Unable to successfully set core mask (result={})", set_result.raw);
         return set_result;
     }
-
     return RESULT_SUCCESS;
 }
 
@@ -2002,8 +2022,17 @@ static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle
 static ResultCode SignalEvent(Core::System& system, Handle event_handle) {
     LOG_DEBUG(Kernel_SVC, "called, event_handle=0x{:08X}", event_handle);
 
+    auto& kernel = system.Kernel();
     // Get the current handle table.
-    const HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    const HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();
+
+    // Reserve a new event from the process resource limit.
+    KScopedResourceReservation event_reservation(kernel.CurrentProcess(),
+                                                 LimitableResource::Events);
+    if (!event_reservation.Succeeded()) {
+        LOG_ERROR(Kernel, "Could not reserve a new event");
+        return ResultResourceLimitedExceeded;
+    }
 
     // Get the writable event.
     auto writable_event = handle_table.Get<KWritableEvent>(event_handle);
@@ -2012,6 +2041,9 @@ static ResultCode SignalEvent(Core::System& system, Handle event_handle) {
         return ResultInvalidHandle;
     }
 
+    // Commit the successfuly reservation.
+    event_reservation.Commit();
+
     return writable_event->Signal();
 }
 
@@ -2043,7 +2075,7 @@ static ResultCode ClearEvent(Core::System& system, Handle event_handle) {
 
     LOG_ERROR(Kernel_SVC, "Event handle does not exist, event_handle=0x{:08X}", event_handle);
 
-    return Svc::ResultInvalidHandle;
+    return ResultInvalidHandle;
 }
 
 static ResultCode ClearEvent32(Core::System& system, Handle event_handle) {
@@ -2106,13 +2138,13 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_
     if (!process) {
         LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
                   process_handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
     }
 
     const auto info_type = static_cast<InfoType>(type);
     if (info_type != InfoType::Status) {
         LOG_ERROR(Kernel_SVC, "Expected info_type to be Status but got {} instead", type);
-        return ERR_INVALID_ENUM_VALUE;
+        return ResultInvalidEnumValue;
     }
 
     *out = static_cast<u64>(process->GetStatus());
@@ -2174,7 +2206,7 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour
     const auto type = static_cast<LimitableResource>(resource_type);
     if (!IsValidResourceType(type)) {
         LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
-        return ERR_INVALID_ENUM_VALUE;
+        return ResultInvalidEnumValue;
     }
 
     auto* const current_process = system.Kernel().CurrentProcess();
@@ -2185,16 +2217,16 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour
     if (!resource_limit_object) {
         LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
                   resource_limit);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
     }
 
     const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value));
     if (set_result.IsError()) {
-        LOG_ERROR(
-            Kernel_SVC,
-            "Attempted to lower resource limit ({}) for category '{}' below its current value ({})",
-            resource_limit_object->GetLimitValue(type), resource_type,
-            resource_limit_object->GetCurrentValue(type));
+        LOG_ERROR(Kernel_SVC,
+                  "Attempted to lower resource limit ({}) for category '{}' below its current "
+                  "value ({})",
+                  resource_limit_object->GetLimitValue(type), resource_type,
+                  resource_limit_object->GetCurrentValue(type));
         return set_result;
     }
 
@@ -2211,7 +2243,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
         LOG_ERROR(Kernel_SVC,
                   "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}",
                   out_process_ids_size);
-        return ERR_OUT_OF_RANGE;
+        return ResultOutOfRange;
     }
 
     const auto& kernel = system.Kernel();
@@ -2221,7 +2253,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
                                         out_process_ids, total_copy_size)) {
         LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
                   out_process_ids, out_process_ids + total_copy_size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     auto& memory = system.Memory();
@@ -2250,7 +2282,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
     if ((out_thread_ids_size & 0xF0000000) != 0) {
         LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}",
                   out_thread_ids_size);
-        return ERR_OUT_OF_RANGE;
+        return ResultOutOfRange;
     }
 
     const auto* const current_process = system.Kernel().CurrentProcess();
@@ -2260,7 +2292,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
         !current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) {
         LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
                   out_thread_ids, out_thread_ids + total_copy_size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
     }
 
     auto& memory = system.Memory();
diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h
index 204cd989d..a26d9f2c9 100644
--- a/src/core/hle/kernel/svc_results.h
+++ b/src/core/hle/kernel/svc_results.h
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2018 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
@@ -6,21 +6,36 @@
 
 #include "core/hle/result.h"
 
-namespace Kernel::Svc {
+namespace Kernel {
 
+// Confirmed Switch kernel error codes
+
+constexpr ResultCode ResultMaxConnectionsReached{ErrorModule::Kernel, 7};
+constexpr ResultCode ResultInvalidCapabilityDescriptor{ErrorModule::Kernel, 14};
 constexpr ResultCode ResultNoSynchronizationObject{ErrorModule::Kernel, 57};
 constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59};
+constexpr ResultCode ResultInvalidSize{ErrorModule::Kernel, 101};
 constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102};
 constexpr ResultCode ResultOutOfResource{ErrorModule::Kernel, 103};
+constexpr ResultCode ResultOutOfMemory{ErrorModule::Kernel, 104};
+constexpr ResultCode ResultHandleTableFull{ErrorModule::Kernel, 105};
 constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106};
+constexpr ResultCode ResultInvalidMemoryPermissions{ErrorModule::Kernel, 108};
+constexpr ResultCode ResultInvalidMemoryRange{ErrorModule::Kernel, 110};
 constexpr ResultCode ResultInvalidPriority{ErrorModule::Kernel, 112};
 constexpr ResultCode ResultInvalidCoreId{ErrorModule::Kernel, 113};
 constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114};
+constexpr ResultCode ResultInvalidPointer{ErrorModule::Kernel, 115};
 constexpr ResultCode ResultInvalidCombination{ErrorModule::Kernel, 116};
 constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117};
 constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118};
+constexpr ResultCode ResultOutOfRange{ErrorModule::Kernel, 119};
 constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120};
+constexpr ResultCode ResultNotFound{ErrorModule::Kernel, 121};
 constexpr ResultCode ResultBusy{ErrorModule::Kernel, 122};
+constexpr ResultCode ResultSessionClosedByRemote{ErrorModule::Kernel, 123};
 constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125};
+constexpr ResultCode ResultReservedValue{ErrorModule::Kernel, 126};
+constexpr ResultCode ResultResourceLimitedExceeded{ErrorModule::Kernel, 132};
 
-} // namespace Kernel::Svc
+} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
index 765f408c3..6b0fc1591 100644
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "core/hle/kernel/k_resource_limit.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/process.h"
@@ -17,6 +18,7 @@ TransferMemory::TransferMemory(KernelCore& kernel, Core::Memory::Memory& memory)
 TransferMemory::~TransferMemory() {
     // Release memory region when transfer memory is destroyed
     Reset();
+    owner_process->GetResourceLimit()->Release(LimitableResource::TransferMemory, 1);
 }
 
 std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel,
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index bb77c2569..8e1fe9438 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1047,20 +1047,21 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
 
     const u64 offset{rp.Pop<u64>()};
     const std::vector<u8> data{ctx.ReadBuffer()};
+    const std::size_t size{std::min(data.size(), backing.GetSize() - offset)};
 
-    LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size());
+    LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);
 
-    if (data.size() > backing.GetSize() - offset) {
+    if (offset > backing.GetSize()) {
         LOG_ERROR(Service_AM,
                   "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}",
-                  backing.GetSize(), data.size(), offset);
+                  backing.GetSize(), size, offset);
 
         IPC::ResponseBuilder rb{ctx, 2};
         rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
         return;
     }
 
-    std::memcpy(backing.GetData().data() + offset, data.data(), data.size());
+    std::memcpy(backing.GetData().data() + offset, data.data(), size);
 
     IPC::ResponseBuilder rb{ctx, 2};
     rb.Push(RESULT_SUCCESS);
@@ -1070,11 +1071,11 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
     IPC::RequestParser rp{ctx};
 
     const u64 offset{rp.Pop<u64>()};
-    const std::size_t size{ctx.GetWriteBufferSize()};
+    const std::size_t size{std::min(ctx.GetWriteBufferSize(), backing.GetSize() - offset)};
 
     LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);
 
-    if (size > backing.GetSize() - offset) {
+    if (offset > backing.GetSize()) {
         LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}",
                   backing.GetSize(), size, offset);
 
diff --git a/src/core/hle/service/am/applets/controller.cpp b/src/core/hle/service/am/applets/controller.cpp
index d7d3ee99a..c2bfe698f 100644
--- a/src/core/hle/service/am/applets/controller.cpp
+++ b/src/core/hle/service/am/applets/controller.cpp
@@ -211,7 +211,8 @@ void Controller::Execute() {
     case ControllerSupportMode::ShowControllerFirmwareUpdate:
         UNIMPLEMENTED_MSG("ControllerSupportMode={} is not implemented",
                           controller_private_arg.mode);
-        [[fallthrough]];
+        ConfigurationComplete();
+        break;
     default: {
         ConfigurationComplete();
         break;
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index 3022438b1..79b209c6b 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -121,6 +121,10 @@ void SoftwareKeyboard::ExecuteInteractive() {
         std::memcpy(&request, data.data(), sizeof(Request));
 
         switch (request) {
+        case Request::Finalize:
+            complete = true;
+            broker.SignalStateChanged();
+            break;
         case Request::Calc: {
             broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{1}));
             broker.SignalStateChanged();
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index dbf198345..70b9f3824 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -21,6 +21,7 @@
 
 namespace Service::HID {
 constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
+constexpr s32 HID_TRIGGER_MAX = 0x7fff;
 [[maybe_unused]] constexpr s32 HID_JOYSTICK_MIN = -0x7fff;
 constexpr std::size_t NPAD_OFFSET = 0x9A00;
 constexpr u32 BATTERY_FULL = 2;
@@ -48,6 +49,8 @@ Controller_NPad::NPadControllerType Controller_NPad::MapSettingsTypeToNPad(
         return NPadControllerType::JoyRight;
     case Settings::ControllerType::Handheld:
         return NPadControllerType::Handheld;
+    case Settings::ControllerType::GameCube:
+        return NPadControllerType::GameCube;
     default:
         UNREACHABLE();
         return NPadControllerType::ProController;
@@ -67,6 +70,8 @@ Settings::ControllerType Controller_NPad::MapNPadToSettingsType(
         return Settings::ControllerType::RightJoycon;
     case NPadControllerType::Handheld:
         return Settings::ControllerType::Handheld;
+    case NPadControllerType::GameCube:
+        return Settings::ControllerType::GameCube;
     default:
         UNREACHABLE();
         return Settings::ControllerType::ProController;
@@ -209,6 +214,13 @@ void Controller_NPad::InitNewlyAddedController(std::size_t controller_idx) {
         controller.assignment_mode = NpadAssignments::Single;
         controller.footer_type = AppletFooterUiType::JoyRightHorizontal;
         break;
+    case NPadControllerType::GameCube:
+        controller.style_set.gamecube.Assign(1);
+        // The GC Controller behaves like a wired Pro Controller
+        controller.device_type.fullkey.Assign(1);
+        controller.system_properties.is_vertical.Assign(1);
+        controller.system_properties.use_plus.Assign(1);
+        break;
     case NPadControllerType::Pokeball:
         controller.style_set.palma.Assign(1);
         controller.device_type.palma.Assign(1);
@@ -259,6 +271,7 @@ void Controller_NPad::OnInit() {
         style.joycon_right.Assign(1);
         style.joycon_dual.Assign(1);
         style.fullkey.Assign(1);
+        style.gamecube.Assign(1);
         style.palma.Assign(1);
     }
 
@@ -339,6 +352,7 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
     auto& pad_state = npad_pad_states[controller_idx].pad_states;
     auto& lstick_entry = npad_pad_states[controller_idx].l_stick;
     auto& rstick_entry = npad_pad_states[controller_idx].r_stick;
+    auto& trigger_entry = npad_trigger_states[controller_idx];
     const auto& button_state = buttons[controller_idx];
     const auto& analog_state = sticks[controller_idx];
     const auto [stick_l_x_f, stick_l_y_f] =
@@ -404,6 +418,17 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
         pad_state.left_sl.Assign(button_state[SL - BUTTON_HID_BEGIN]->GetStatus());
         pad_state.left_sr.Assign(button_state[SR - BUTTON_HID_BEGIN]->GetStatus());
     }
+
+    if (controller_type == NPadControllerType::GameCube) {
+        trigger_entry.l_analog = static_cast<s32>(
+            button_state[ZL - BUTTON_HID_BEGIN]->GetStatus() ? HID_TRIGGER_MAX : 0);
+        trigger_entry.r_analog = static_cast<s32>(
+            button_state[ZR - BUTTON_HID_BEGIN]->GetStatus() ? HID_TRIGGER_MAX : 0);
+        pad_state.zl.Assign(false);
+        pad_state.zr.Assign(button_state[R - BUTTON_HID_BEGIN]->GetStatus());
+        pad_state.l.Assign(button_state[ZL - BUTTON_HID_BEGIN]->GetStatus());
+        pad_state.r.Assign(button_state[ZR - BUTTON_HID_BEGIN]->GetStatus());
+    }
 }
 
 void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
@@ -418,6 +443,11 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
             &npad.joy_left_states,  &npad.joy_right_states, &npad.palma_states,
             &npad.system_ext_states};
 
+        // There is the posibility to have more controllers with analog triggers
+        const std::array<TriggerGeneric*, 1> controller_triggers{
+            &npad.gc_trigger_states,
+        };
+
         for (auto* main_controller : controller_npads) {
             main_controller->common.entry_count = 16;
             main_controller->common.total_entry_count = 17;
@@ -435,6 +465,21 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
             cur_entry.timestamp2 = cur_entry.timestamp;
         }
 
+        for (auto* analog_trigger : controller_triggers) {
+            analog_trigger->entry_count = 16;
+            analog_trigger->total_entry_count = 17;
+
+            const auto& last_entry = analog_trigger->trigger[analog_trigger->last_entry_index];
+
+            analog_trigger->timestamp = core_timing.GetCPUTicks();
+            analog_trigger->last_entry_index = (analog_trigger->last_entry_index + 1) % 17;
+
+            auto& cur_entry = analog_trigger->trigger[analog_trigger->last_entry_index];
+
+            cur_entry.timestamp = last_entry.timestamp + 1;
+            cur_entry.timestamp2 = cur_entry.timestamp;
+        }
+
         const auto& controller_type = connected_controllers[i].type;
 
         if (controller_type == NPadControllerType::None || !connected_controllers[i].is_connected) {
@@ -444,6 +489,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
 
         RequestPadStateUpdate(npad_index);
         auto& pad_state = npad_pad_states[npad_index];
+        auto& trigger_state = npad_trigger_states[npad_index];
 
         auto& main_controller =
             npad.fullkey_states.npad[npad.fullkey_states.common.last_entry_index];
@@ -456,6 +502,8 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
         auto& pokeball_entry = npad.palma_states.npad[npad.palma_states.common.last_entry_index];
         auto& libnx_entry =
             npad.system_ext_states.npad[npad.system_ext_states.common.last_entry_index];
+        auto& trigger_entry =
+            npad.gc_trigger_states.trigger[npad.gc_trigger_states.last_entry_index];
 
         libnx_entry.connection_status.raw = 0;
         libnx_entry.connection_status.is_connected.Assign(1);
@@ -524,6 +572,18 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
 
             libnx_entry.connection_status.is_right_connected.Assign(1);
             break;
+        case NPadControllerType::GameCube:
+            main_controller.connection_status.raw = 0;
+            main_controller.connection_status.is_connected.Assign(1);
+            main_controller.connection_status.is_wired.Assign(1);
+            main_controller.pad.pad_states.raw = pad_state.pad_states.raw;
+            main_controller.pad.l_stick = pad_state.l_stick;
+            main_controller.pad.r_stick = pad_state.r_stick;
+            trigger_entry.l_analog = trigger_state.l_analog;
+            trigger_entry.r_analog = trigger_state.r_analog;
+
+            libnx_entry.connection_status.is_wired.Assign(1);
+            break;
         case NPadControllerType::Pokeball:
             pokeball_entry.connection_status.raw = 0;
             pokeball_entry.connection_status.is_connected.Assign(1);
@@ -674,6 +734,7 @@ void Controller_NPad::OnMotionUpdate(const Core::Timing::CoreTiming& core_timing
                 right_sixaxis_entry.orientation = motion_devices[1].orientation;
             }
             break;
+        case NPadControllerType::GameCube:
         case NPadControllerType::Pokeball:
             break;
         }
@@ -1135,6 +1196,8 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const
             return style.joycon_left;
         case NPadControllerType::JoyRight:
             return style.joycon_right;
+        case NPadControllerType::GameCube:
+            return style.gamecube;
         case NPadControllerType::Pokeball:
             return style.palma;
         default:
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 48bab988c..bc2e6779d 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -51,6 +51,7 @@ public:
         JoyDual,
         JoyLeft,
         JoyRight,
+        GameCube,
         Pokeball,
     };
 
@@ -60,6 +61,7 @@ public:
         JoyconDual = 5,
         JoyconLeft = 6,
         JoyconRight = 7,
+        GameCube = 8,
         Pokeball = 9,
         MaxNpadType = 10,
     };
@@ -389,6 +391,25 @@ private:
     };
     static_assert(sizeof(SixAxisGeneric) == 0x708, "SixAxisGeneric is an invalid size");
 
+    struct TriggerState {
+        s64_le timestamp{};
+        s64_le timestamp2{};
+        s32_le l_analog{};
+        s32_le r_analog{};
+    };
+    static_assert(sizeof(TriggerState) == 0x18, "TriggerState is an invalid size");
+
+    struct TriggerGeneric {
+        INSERT_PADDING_BYTES(0x4);
+        s64_le timestamp;
+        INSERT_PADDING_BYTES(0x4);
+        s64_le total_entry_count;
+        s64_le last_entry_index;
+        s64_le entry_count;
+        std::array<TriggerState, 17> trigger{};
+    };
+    static_assert(sizeof(TriggerGeneric) == 0x1C8, "TriggerGeneric is an invalid size");
+
     struct NPadSystemProperties {
         union {
             s64_le raw{};
@@ -509,7 +530,9 @@ private:
         AppletFooterUiType footer_type;
         // nfc_states needs to be checked switchbrew does not match with HW
         NfcXcdHandle nfc_states;
-        INSERT_PADDING_BYTES(0xdef);
+        INSERT_PADDING_BYTES(0x8); // Mutex
+        TriggerGeneric gc_trigger_states;
+        INSERT_PADDING_BYTES(0xc1f);
     };
     static_assert(sizeof(NPadEntry) == 0x5000, "NPadEntry is an invalid size");
 
@@ -560,6 +583,7 @@ private:
     f32 sixaxis_fusion_parameter2{};
     bool sixaxis_at_rest{true};
     std::array<ControllerPad, 10> npad_pad_states{};
+    std::array<TriggerState, 10> npad_trigger_states{};
     bool is_in_lr_assignment_mode{false};
     Core::System& system;
 };
diff --git a/src/core/hle/service/ldn/errors.h b/src/core/hle/service/ldn/errors.h
new file mode 100644
index 000000000..a718c5c66
--- /dev/null
+++ b/src/core/hle/service/ldn/errors.h
@@ -0,0 +1,13 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Service::LDN {
+
+constexpr ResultCode ERROR_DISABLED{ErrorModule::LDN, 22};
+
+} // namespace Service::LDN
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index ee908f399..c630d93cd 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -6,6 +6,7 @@
 
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/result.h"
+#include "core/hle/service/ldn/errors.h"
 #include "core/hle/service/ldn/ldn.h"
 #include "core/hle/service/sm/sm.h"
 
@@ -103,7 +104,7 @@ public:
         : ServiceFramework{system_, "IUserLocalCommunicationService"} {
         // clang-format off
         static const FunctionInfo functions[] = {
-            {0, nullptr, "GetState"},
+            {0, &IUserLocalCommunicationService::GetState, "GetState"},
             {1, nullptr, "GetNetworkInfo"},
             {2, nullptr, "GetIpv4Address"},
             {3, nullptr, "GetDisconnectReason"},
@@ -138,13 +139,38 @@ public:
         RegisterHandlers(functions);
     }
 
-    void Initialize2(Kernel::HLERequestContext& ctx) {
+    void GetState(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_LDN, "(STUBBED) called");
-        // Result success seem make this services start network and continue.
-        // If we just pass result error then it will stop and maybe try again and again.
+
+        IPC::ResponseBuilder rb{ctx, 3};
+
+        // Indicate a network error, as we do not actually emulate LDN
+        rb.Push(static_cast<u32>(State::Error));
+
+        rb.Push(RESULT_SUCCESS);
+    }
+
+    void Initialize2(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LDN, "called");
+
+        is_initialized = true;
+
         IPC::ResponseBuilder rb{ctx, 2};
-        rb.Push(RESULT_UNKNOWN);
+        rb.Push(RESULT_SUCCESS);
     }
+
+private:
+    enum class State {
+        None,
+        Initialized,
+        AccessPointOpened,
+        AccessPointCreated,
+        StationOpened,
+        StationConnected,
+        Error,
+    };
+
+    bool is_initialized{};
 };
 
 class LDNS final : public ServiceFramework<LDNS> {
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9da786b4e..c724d2554 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -11,10 +11,10 @@
 #include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/memory/system_control.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/hle/service/ldr/ldr.h"
 #include "core/hle/service/service.h"
 #include "core/loader/nro.h"
@@ -330,7 +330,7 @@ public:
             const VAddr addr{GetRandomMapRegion(page_table, size)};
             const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)};
 
-            if (result == Kernel::ERR_INVALID_ADDRESS_STATE) {
+            if (result == Kernel::ResultInvalidCurrentMemory) {
                 continue;
             }
 
@@ -361,7 +361,7 @@ public:
                 const ResultCode result{
                     page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)};
 
-                if (result == Kernel::ERR_INVALID_ADDRESS_STATE) {
+                if (result == Kernel::ResultInvalidCurrentMemory) {
                     continue;
                 }
 
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 5d6d25696..2d1d4d67f 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -215,7 +215,7 @@ private:
         const auto& amiibo = nfp_interface.GetAmiiboBuffer();
         const TagInfo tag_info{
             .uuid = amiibo.uuid,
-            .uuid_length = static_cast<u8>(tag_info.uuid.size()),
+            .uuid_length = static_cast<u8>(amiibo.uuid.size()),
             .padding_1 = {},
             .protocol = 1, // TODO(ogniK): Figure out actual values
             .tag_type = 2,
diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp
index 0b306b87a..78e9cd708 100644
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -453,7 +453,8 @@ std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protoco
         return {-1, Errno::MFILE};
     }
 
-    FileDescriptor& descriptor = file_descriptors[fd].emplace();
+    file_descriptors[fd] = FileDescriptor{};
+    FileDescriptor& descriptor = *file_descriptors[fd];
     // ENONMEM might be thrown here
 
     LOG_INFO(Service, "New socket fd={}", fd);
@@ -548,7 +549,8 @@ std::pair<s32, Errno> BSD::AcceptImpl(s32 fd, std::vector<u8>& write_buffer) {
         return {-1, Translate(bsd_errno)};
     }
 
-    FileDescriptor& new_descriptor = file_descriptors[new_fd].emplace();
+    file_descriptors[new_fd] = FileDescriptor{};
+    FileDescriptor& new_descriptor = *file_descriptors[new_fd];
     new_descriptor.socket = std::move(result.socket);
     new_descriptor.is_connection_based = descriptor.is_connection_based;
 
diff --git a/src/core/settings.h b/src/core/settings.h
index a324530bd..d849dded3 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -181,12 +181,13 @@ struct Values {
     std::string motion_device;
     std::string udp_input_servers;
 
-    bool emulate_analog_keyboard;
-
+    bool mouse_panning;
+    float mouse_panning_sensitivity;
     bool mouse_enabled;
     std::string mouse_device;
     MouseButtonsRaw mouse_buttons;
 
+    bool emulate_analog_keyboard;
     bool keyboard_enabled;
     KeyboardKeysRaw keyboard_keys;
     KeyboardModsRaw keyboard_mods;
diff --git a/src/input_common/mouse/mouse_input.cpp b/src/input_common/mouse/mouse_input.cpp
index 10786a541..b864d26f2 100644
--- a/src/input_common/mouse/mouse_input.cpp
+++ b/src/input_common/mouse/mouse_input.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2+
 // Refer to the license.txt file included.
 
+#include "core/settings.h"
 #include "input_common/mouse/mouse_input.h"
 
 namespace MouseInput {
@@ -32,10 +33,18 @@ void Mouse::UpdateThread() {
             info.motion.UpdateOrientation(update_time * 1000);
             info.tilt_speed = 0;
             info.data.motion = info.motion.GetMotion();
+            if (Settings::values.mouse_panning) {
+                info.last_mouse_change *= 0.96f;
+                info.data.axis = {static_cast<int>(16 * info.last_mouse_change.x),
+                                  static_cast<int>(16 * -info.last_mouse_change.y)};
+            }
         }
         if (configuring) {
             UpdateYuzuSettings();
         }
+        if (mouse_panning_timout++ > 20) {
+            StopPanning();
+        }
         std::this_thread::sleep_for(std::chrono::milliseconds(update_time));
     }
 }
@@ -65,8 +74,45 @@ void Mouse::PressButton(int x, int y, int button_) {
     mouse_info[button_index].data.pressed = true;
 }
 
-void Mouse::MouseMove(int x, int y) {
+void Mouse::StopPanning() {
     for (MouseInfo& info : mouse_info) {
+        if (Settings::values.mouse_panning) {
+            info.data.axis = {};
+            info.tilt_speed = 0;
+            info.last_mouse_change = {};
+        }
+    }
+}
+
+void Mouse::MouseMove(int x, int y, int center_x, int center_y) {
+    for (MouseInfo& info : mouse_info) {
+        if (Settings::values.mouse_panning) {
+            auto mouse_change =
+                (Common::MakeVec(x, y) - Common::MakeVec(center_x, center_y)).Cast<float>();
+            mouse_panning_timout = 0;
+
+            if (mouse_change.y == 0 && mouse_change.x == 0) {
+                continue;
+            }
+            const auto mouse_change_length = mouse_change.Length();
+            if (mouse_change_length < 3.0f) {
+                mouse_change /= mouse_change_length / 3.0f;
+            }
+
+            info.last_mouse_change = (info.last_mouse_change * 0.91f) + (mouse_change * 0.09f);
+
+            const auto last_mouse_change_length = info.last_mouse_change.Length();
+            if (last_mouse_change_length > 8.0f) {
+                info.last_mouse_change /= last_mouse_change_length / 8.0f;
+            } else if (last_mouse_change_length < 1.0f) {
+                info.last_mouse_change = mouse_change / mouse_change.Length();
+            }
+
+            info.tilt_direction = info.last_mouse_change;
+            info.tilt_speed = info.tilt_direction.Normalize() * info.sensitivity;
+            continue;
+        }
+
         if (info.data.pressed) {
             const auto mouse_move = Common::MakeVec(x, y) - info.mouse_origin;
             const auto mouse_change = Common::MakeVec(x, y) - info.last_mouse_position;
diff --git a/src/input_common/mouse/mouse_input.h b/src/input_common/mouse/mouse_input.h
index 58803c1bf..46aa676c1 100644
--- a/src/input_common/mouse/mouse_input.h
+++ b/src/input_common/mouse/mouse_input.h
@@ -57,8 +57,10 @@ public:
      * Signals that mouse has moved.
      * @param x the x-coordinate of the cursor
      * @param y the y-coordinate of the cursor
+     * @param center_x the x-coordinate of the middle of the screen
+     * @param center_y the y-coordinate of the middle of the screen
      */
-    void MouseMove(int x, int y);
+    void MouseMove(int x, int y, int center_x, int center_y);
 
     /**
      * Signals that a motion sensor tilt has ended.
@@ -74,11 +76,13 @@ public:
 private:
     void UpdateThread();
     void UpdateYuzuSettings();
+    void StopPanning();
 
     struct MouseInfo {
         InputCommon::MotionInput motion{0.0f, 0.0f, 0.0f};
         Common::Vec2<int> mouse_origin;
         Common::Vec2<int> last_mouse_position;
+        Common::Vec2<float> last_mouse_change;
         bool is_tilting = false;
         float sensitivity{0.120f};
 
@@ -94,5 +98,6 @@ private:
     Common::SPSCQueue<MouseStatus> mouse_queue;
     bool configuring{false};
     bool update_thread_running{true};
+    int mouse_panning_timout{};
 };
 } // namespace MouseInput
diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp
index 3d799b293..bb56787ee 100644
--- a/src/input_common/mouse/mouse_poller.cpp
+++ b/src/input_common/mouse/mouse_poller.cpp
@@ -6,6 +6,7 @@
 #include <utility>
 
 #include "common/threadsafe_queue.h"
+#include "core/settings.h"
 #include "input_common/mouse/mouse_input.h"
 #include "input_common/mouse/mouse_poller.h"
 
@@ -71,7 +72,7 @@ public:
         std::lock_guard lock{mutex};
         const auto axis_value =
             static_cast<float>(mouse_input->GetMouseState(button).axis.at(axis));
-        return axis_value / (100.0f * range);
+        return axis_value * Settings::values.mouse_panning_sensitivity / (100.0f * range);
     }
 
     std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
index f67de37e3..a88ae452f 100644
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -717,6 +717,13 @@ SDLState::SDLState() {
     if (SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1") == SDL_FALSE) {
         LOG_ERROR(Input, "Failed to set hint for background events with: {}", SDL_GetError());
     }
+// these hints are only defined on sdl2.0.9 or higher
+#if SDL_VERSION_ATLEAST(2, 0, 9)
+#if !SDL_VERSION_ATLEAST(2, 0, 12)
+    // There are also hints to toggle the individual drivers if needed.
+    SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI, "0");
+#endif
+#endif
 
     SDL_AddEventWatch(&SDLEventWatcher, this);
 
diff --git a/src/input_common/settings.h b/src/input_common/settings.h
index 75486554b..a59f5d461 100644
--- a/src/input_common/settings.h
+++ b/src/input_common/settings.h
@@ -340,6 +340,7 @@ enum class ControllerType {
     LeftJoycon,
     RightJoycon,
     Handheld,
+    GameCube,
 };
 
 struct PlayerInput {
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
index 651633e9e..edced69bb 100644
--- a/src/tests/video_core/buffer_base.cpp
+++ b/src/tests/video_core/buffer_base.cpp
@@ -471,3 +471,79 @@ TEST_CASE("BufferBase: Unaligned page region query") {
     REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
     REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
 }
+
+TEST_CASE("BufferBase: Cached write") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.CachedCpuWrite(c + PAGE, PAGE);
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.FlushCachedWrites();
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Multiple cached write") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.CachedCpuWrite(c + PAGE, PAGE);
+    buffer.CachedCpuWrite(c + PAGE * 3, PAGE);
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
+    buffer.FlushCachedWrites();
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Cached write unmarked") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.CachedCpuWrite(c + PAGE, PAGE);
+    buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.FlushCachedWrites();
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Cached write iterated") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.CachedCpuWrite(c + PAGE, PAGE);
+    int num = 0;
+    buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
+    REQUIRE(num == 0);
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.FlushCachedWrites();
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Cached write downloads") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 64);
+    buffer.CachedCpuWrite(c + PAGE, PAGE);
+    REQUIRE(rasterizer.Count() == 63);
+    buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
+    int num = 0;
+    buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
+    buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
+    REQUIRE(num == 0);
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
+    buffer.FlushCachedWrites();
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index dd4c29ed3..9b931976a 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -2,10 +2,8 @@ add_subdirectory(host_shaders)
 
 add_library(video_core STATIC
     buffer_cache/buffer_base.h
-    buffer_cache/buffer_block.h
+    buffer_cache/buffer_cache.cpp
     buffer_cache/buffer_cache.h
-    buffer_cache/map_interval.cpp
-    buffer_cache/map_interval.h
     cdma_pusher.cpp
     cdma_pusher.h
     command_classes/codecs/codec.cpp
@@ -152,8 +150,6 @@ add_library(video_core STATIC
     renderer_vulkan/vk_staging_buffer_pool.h
     renderer_vulkan/vk_state_tracker.cpp
     renderer_vulkan/vk_state_tracker.h
-    renderer_vulkan/vk_stream_buffer.cpp
-    renderer_vulkan/vk_stream_buffer.h
     renderer_vulkan/vk_swapchain.cpp
     renderer_vulkan/vk_swapchain.h
     renderer_vulkan/vk_texture_cache.cpp
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index ee8602ce9..0c00ae280 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -19,6 +19,7 @@ namespace VideoCommon {
 
 enum class BufferFlagBits {
     Picked = 1 << 0,
+    CachedWrites = 1 << 1,
 };
 DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)
 
@@ -40,7 +41,7 @@ class BufferBase {
     static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
 
     /// Vector tracking modified pages tightly packed with small vector optimization
-    union WrittenWords {
+    union WordsArray {
         /// Returns the pointer to the words state
         [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
             return is_short ? &stack : heap;
@@ -55,49 +56,59 @@ class BufferBase {
         u64* heap;     ///< Not-small buffers pointer to the storage
     };
 
-    struct GpuCpuWords {
-        explicit GpuCpuWords() = default;
-        explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} {
+    struct Words {
+        explicit Words() = default;
+        explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
             if (IsShort()) {
                 cpu.stack = ~u64{0};
                 gpu.stack = 0;
+                cached_cpu.stack = 0;
+                untracked.stack = ~u64{0};
             } else {
                 // Share allocation between CPU and GPU pages and set their default values
                 const size_t num_words = NumWords();
-                u64* const alloc = new u64[num_words * 2];
+                u64* const alloc = new u64[num_words * 4];
                 cpu.heap = alloc;
                 gpu.heap = alloc + num_words;
+                cached_cpu.heap = alloc + num_words * 2;
+                untracked.heap = alloc + num_words * 3;
                 std::fill_n(cpu.heap, num_words, ~u64{0});
                 std::fill_n(gpu.heap, num_words, 0);
+                std::fill_n(cached_cpu.heap, num_words, 0);
+                std::fill_n(untracked.heap, num_words, ~u64{0});
             }
             // Clean up tailing bits
-            const u64 last_local_page =
-                Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE);
+            const u64 last_word_size = size_bytes % BYTES_PER_WORD;
+            const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
             const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
-            u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1];
-            last_word = (last_word << shift) >> shift;
+            const u64 last_word = (~u64{0} << shift) >> shift;
+            cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
+            untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
         }
 
-        ~GpuCpuWords() {
+        ~Words() {
             Release();
         }
 
-        GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept {
+        Words& operator=(Words&& rhs) noexcept {
             Release();
             size_bytes = rhs.size_bytes;
             cpu = rhs.cpu;
             gpu = rhs.gpu;
+            cached_cpu = rhs.cached_cpu;
+            untracked = rhs.untracked;
             rhs.cpu.heap = nullptr;
             return *this;
         }
 
-        GpuCpuWords(GpuCpuWords&& rhs) noexcept
-            : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} {
+        Words(Words&& rhs) noexcept
+            : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
+              cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
             rhs.cpu.heap = nullptr;
         }
 
-        GpuCpuWords& operator=(const GpuCpuWords&) = delete;
-        GpuCpuWords(const GpuCpuWords&) = delete;
+        Words& operator=(const Words&) = delete;
+        Words(const Words&) = delete;
 
         /// Returns true when the buffer fits in the small vector optimization
         [[nodiscard]] bool IsShort() const noexcept {
@@ -118,8 +129,17 @@ class BufferBase {
         }
 
         u64 size_bytes = 0;
-        WrittenWords cpu;
-        WrittenWords gpu;
+        WordsArray cpu;
+        WordsArray gpu;
+        WordsArray cached_cpu;
+        WordsArray untracked;
+    };
+
+    enum class Type {
+        CPU,
+        GPU,
+        CachedCPU,
+        Untracked,
     };
 
 public:
@@ -132,68 +152,93 @@ public:
     BufferBase& operator=(const BufferBase&) = delete;
     BufferBase(const BufferBase&) = delete;
 
+    BufferBase& operator=(BufferBase&&) = default;
+    BufferBase(BufferBase&&) = default;
+
     /// Returns the inclusive CPU modified range in a begin end pair
     [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
                                                         u64 query_size) const noexcept {
         const u64 offset = query_cpu_addr - cpu_addr;
-        return ModifiedRegion<false>(offset, query_size);
+        return ModifiedRegion<Type::CPU>(offset, query_size);
     }
 
     /// Returns the inclusive GPU modified range in a begin end pair
     [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
                                                         u64 query_size) const noexcept {
         const u64 offset = query_cpu_addr - cpu_addr;
-        return ModifiedRegion<true>(offset, query_size);
+        return ModifiedRegion<Type::GPU>(offset, query_size);
     }
 
     /// Returns true if a region has been modified from the CPU
     [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
         const u64 offset = query_cpu_addr - cpu_addr;
-        return IsRegionModified<false>(offset, query_size);
+        return IsRegionModified<Type::CPU>(offset, query_size);
     }
 
     /// Returns true if a region has been modified from the GPU
     [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
         const u64 offset = query_cpu_addr - cpu_addr;
-        return IsRegionModified<true>(offset, query_size);
+        return IsRegionModified<Type::GPU>(offset, query_size);
     }
 
     /// Mark region as CPU modified, notifying the rasterizer about this change
     void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
-        ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size);
+        ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
     }
 
     /// Unmark region as CPU modified, notifying the rasterizer about this change
     void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
-        ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size);
+        ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
     }
 
     /// Mark region as modified from the host GPU
     void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
-        ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size);
+        ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
     }
 
     /// Unmark region as modified from the host GPU
     void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
-        ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size);
+        ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
+    }
+
+    /// Mark region as modified from the CPU
+    /// but don't mark it as modified until FlusHCachedWrites is called.
+    void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
+        flags |= BufferFlagBits::CachedWrites;
+        ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
+    }
+
+    /// Flushes cached CPU writes, and notify the rasterizer about the deltas
+    void FlushCachedWrites() noexcept {
+        flags &= ~BufferFlagBits::CachedWrites;
+        const u64 num_words = NumWords();
+        const u64* const cached_words = Array<Type::CachedCPU>();
+        u64* const untracked_words = Array<Type::Untracked>();
+        u64* const cpu_words = Array<Type::CPU>();
+        for (u64 word_index = 0; word_index < num_words; ++word_index) {
+            const u64 cached_bits = cached_words[word_index];
+            NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
+            untracked_words[word_index] |= cached_bits;
+            cpu_words[word_index] |= cached_bits;
+        }
     }
 
     /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
     template <typename Func>
     void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
-        ForEachModifiedRange<false, true>(query_cpu_range, size, func);
+        ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
     }
 
     /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
     template <typename Func>
     void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
-        ForEachModifiedRange<true, false>(query_cpu_range, size, func);
+        ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
     }
 
     /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
     template <typename Func>
     void ForEachDownloadRange(Func&& func) {
-        ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func);
+        ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
     }
 
     /// Mark buffer as picked
@@ -206,6 +251,16 @@ public:
         flags &= ~BufferFlagBits::Picked;
     }
 
+    /// Increases the likeliness of this being a stream buffer
+    void IncreaseStreamScore(int score) noexcept {
+        stream_score += score;
+    }
+
+    /// Returns the likeliness of this being a stream buffer
+    [[nodiscard]] int StreamScore() const noexcept {
+        return stream_score;
+    }
+
     /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
     [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
         return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
@@ -216,6 +271,11 @@ public:
         return True(flags & BufferFlagBits::Picked);
     }
 
+    /// Returns true when the buffer has pending cached writes
+    [[nodiscard]] bool HasCachedWrites() const noexcept {
+        return True(flags & BufferFlagBits::CachedWrites);
+    }
+
     /// Returns the base CPU address of the buffer
     [[nodiscard]] VAddr CpuAddr() const noexcept {
         return cpu_addr;
@@ -233,26 +293,48 @@ public:
     }
 
 private:
+    template <Type type>
+    u64* Array() noexcept {
+        if constexpr (type == Type::CPU) {
+            return words.cpu.Pointer(IsShort());
+        } else if constexpr (type == Type::GPU) {
+            return words.gpu.Pointer(IsShort());
+        } else if constexpr (type == Type::CachedCPU) {
+            return words.cached_cpu.Pointer(IsShort());
+        } else if constexpr (type == Type::Untracked) {
+            return words.untracked.Pointer(IsShort());
+        }
+    }
+
+    template <Type type>
+    const u64* Array() const noexcept {
+        if constexpr (type == Type::CPU) {
+            return words.cpu.Pointer(IsShort());
+        } else if constexpr (type == Type::GPU) {
+            return words.gpu.Pointer(IsShort());
+        } else if constexpr (type == Type::CachedCPU) {
+            return words.cached_cpu.Pointer(IsShort());
+        } else if constexpr (type == Type::Untracked) {
+            return words.untracked.Pointer(IsShort());
+        }
+    }
+
     /**
      * Change the state of a range of pages
      *
-     * @param written_words Pages to be marked or unmarked as modified
      * @param dirty_addr    Base address to mark or unmark as modified
      * @param size          Size in bytes to mark or unmark as modified
-     *
-     * @tparam enable            True when the bits will be set to one, false for zero
-     * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes
      */
-    template <bool enable, bool notify_rasterizer>
-    void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr,
-                           s64 size) noexcept(!notify_rasterizer) {
+    template <Type type, bool enable>
+    void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
         const s64 difference = dirty_addr - cpu_addr;
         const u64 offset = std::max<s64>(difference, 0);
         size += std::min<s64>(difference, 0);
         if (offset >= SizeBytes() || size < 0) {
             return;
         }
-        u64* const state_words = written_words.Pointer(IsShort());
+        u64* const untracked_words = Array<Type::Untracked>();
+        u64* const state_words = Array<type>();
         const u64 offset_end = std::min(offset + size, SizeBytes());
         const u64 begin_page_index = offset / BYTES_PER_PAGE;
         const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
@@ -268,13 +350,19 @@ private:
             u64 bits = ~u64{0};
             bits = (bits >> right_offset) << right_offset;
             bits = (bits << left_offset) >> left_offset;
-            if constexpr (notify_rasterizer) {
-                NotifyRasterizer<!enable>(word_index, state_words[word_index], bits);
+            if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+                NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
             }
             if constexpr (enable) {
                 state_words[word_index] |= bits;
+                if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+                    untracked_words[word_index] |= bits;
+                }
             } else {
                 state_words[word_index] &= ~bits;
+                if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+                    untracked_words[word_index] &= ~bits;
+                }
             }
             page_index = 0;
             ++word_index;
@@ -291,7 +379,7 @@ private:
      * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
      */
     template <bool add_to_rasterizer>
-    void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) {
+    void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
         u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
         VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
         while (changed_bits != 0) {
@@ -315,21 +403,20 @@ private:
      * @param query_cpu_range Base CPU address to loop over
      * @param size            Size in bytes of the CPU range to loop over
      * @param func            Function to call for each turned off region
-     *
-     * @tparam gpu               True for host GPU pages, false for CPU pages
-     * @tparam notify_rasterizer True when the rasterizer should be notified about state changes
      */
-    template <bool gpu, bool notify_rasterizer, typename Func>
+    template <Type type, typename Func>
     void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
+        static_assert(type != Type::Untracked);
+
         const s64 difference = query_cpu_range - cpu_addr;
         const u64 query_begin = std::max<s64>(difference, 0);
         size += std::min<s64>(difference, 0);
         if (query_begin >= SizeBytes() || size < 0) {
             return;
         }
-        const u64* const cpu_words = words.cpu.Pointer(IsShort());
+        u64* const untracked_words = Array<Type::Untracked>();
+        u64* const state_words = Array<type>();
         const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
-        u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
         u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
         u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
 
@@ -345,7 +432,8 @@ private:
         const u64 word_index_end = std::distance(state_words, last_modified_word);
 
         const unsigned local_page_begin = std::countr_zero(*first_modified_word);
-        const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]);
+        const unsigned local_page_end =
+            static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
         const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
         const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
         const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
@@ -371,11 +459,13 @@ private:
             const u64 current_word = state_words[word_index] & bits;
             state_words[word_index] &= ~bits;
 
-            // Exclude CPU modified pages when visiting GPU pages
-            const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0);
-            if constexpr (notify_rasterizer) {
-                NotifyRasterizer<true>(word_index, word, ~u64{0});
+            if constexpr (type == Type::CPU) {
+                const u64 current_bits = untracked_words[word_index] & bits;
+                untracked_words[word_index] &= ~bits;
+                NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
             }
+            // Exclude CPU modified pages when visiting GPU pages
+            const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
             u64 page = page_begin;
             page_begin = 0;
 
@@ -416,17 +506,20 @@ private:
      * @param offset Offset in bytes from the start of the buffer
      * @param size   Size in bytes of the region to query for modifications
      */
-    template <bool gpu>
+    template <Type type>
     [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
-        const u64* const cpu_words = words.cpu.Pointer(IsShort());
-        const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
+        static_assert(type != Type::Untracked);
+
+        const u64* const untracked_words = Array<Type::Untracked>();
+        const u64* const state_words = Array<type>();
         const u64 num_query_words = size / BYTES_PER_WORD + 1;
         const u64 word_begin = offset / BYTES_PER_WORD;
         const u64 word_end = std::min(word_begin + num_query_words, NumWords());
         const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
         u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
         for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
-            const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
+            const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
+            const u64 word = state_words[word_index] & ~off_word;
             if (word == 0) {
                 continue;
             }
@@ -445,13 +538,13 @@ private:
      *
      * @param offset Offset in bytes from the start of the buffer
      * @param size   Size in bytes of the region to query for modifications
-     *
-     * @tparam gpu True to query GPU modified pages, false for CPU pages
      */
-    template <bool gpu>
+    template <Type type>
     [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
-        const u64* const cpu_words = words.cpu.Pointer(IsShort());
-        const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
+        static_assert(type != Type::Untracked);
+
+        const u64* const untracked_words = Array<Type::Untracked>();
+        const u64* const state_words = Array<type>();
         const u64 num_query_words = size / BYTES_PER_WORD + 1;
         const u64 word_begin = offset / BYTES_PER_WORD;
         const u64 word_end = std::min(word_begin + num_query_words, NumWords());
@@ -460,7 +553,8 @@ private:
         u64 begin = std::numeric_limits<u64>::max();
         u64 end = 0;
         for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
-            const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
+            const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
+            const u64 word = state_words[word_index] & ~off_word;
             if (word == 0) {
                 continue;
             }
@@ -488,8 +582,9 @@ private:
 
     RasterizerInterface* rasterizer = nullptr;
     VAddr cpu_addr = 0;
-    GpuCpuWords words;
+    Words words;
     BufferFlagBits flags{};
+    int stream_score = 0;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
deleted file mode 100644
index e9306194a..000000000
--- a/src/video_core/buffer_cache/buffer_block.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace VideoCommon {
-
-class BufferBlock {
-public:
-    [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const {
-        return (cpu_addr < end) && (cpu_addr_end > start);
-    }
-
-    [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const {
-        return cpu_addr <= other_start && other_end <= cpu_addr_end;
-    }
-
-    [[nodiscard]] std::size_t Offset(VAddr in_addr) const {
-        return static_cast<std::size_t>(in_addr - cpu_addr);
-    }
-
-    [[nodiscard]] VAddr CpuAddr() const {
-        return cpu_addr;
-    }
-
-    [[nodiscard]] VAddr CpuAddrEnd() const {
-        return cpu_addr_end;
-    }
-
-    void SetCpuAddr(VAddr new_addr) {
-        cpu_addr = new_addr;
-        cpu_addr_end = new_addr + size;
-    }
-
-    [[nodiscard]] std::size_t Size() const {
-        return size;
-    }
-
-    [[nodiscard]] u64 Epoch() const {
-        return epoch;
-    }
-
-    void SetEpoch(u64 new_epoch) {
-        epoch = new_epoch;
-    }
-
-protected:
-    explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
-        SetCpuAddr(cpu_addr_);
-    }
-
-private:
-    VAddr cpu_addr{};
-    VAddr cpu_addr_end{};
-    std::size_t size{};
-    u64 epoch{};
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
new file mode 100644
index 000000000..ab32294c8
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -0,0 +1,13 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/microprofile.h"
+
+namespace VideoCommon {
+
+MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128));
+MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
+MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 83b9ee871..2a6844ab1 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -4,591 +4,1289 @@
 
 #pragma once
 
-#include <list>
+#include <algorithm>
+#include <array>
+#include <deque>
 #include <memory>
 #include <mutex>
+#include <span>
 #include <unordered_map>
-#include <unordered_set>
-#include <utility>
 #include <vector>
 
 #include <boost/container/small_vector.hpp>
-#include <boost/icl/interval_set.hpp>
-#include <boost/intrusive/set.hpp>
 
-#include "common/alignment.h"
-#include "common/assert.h"
 #include "common/common_types.h"
-#include "common/logging/log.h"
-#include "core/core.h"
+#include "common/div_ceil.h"
+#include "common/microprofile.h"
+#include "common/scope_exit.h"
 #include "core/memory.h"
 #include "core/settings.h"
-#include "video_core/buffer_cache/buffer_block.h"
-#include "video_core/buffer_cache/map_interval.h"
+#include "video_core/buffer_cache/buffer_base.h"
+#include "video_core/delayed_destruction_ring.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/texture_cache/slot_vector.h"
+#include "video_core/texture_cache/types.h"
 
 namespace VideoCommon {
 
-template <typename Buffer, typename BufferType, typename StreamBuffer>
+MICROPROFILE_DECLARE(GPU_PrepareBuffers);
+MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
+MICROPROFILE_DECLARE(GPU_DownloadMemory);
+
+using BufferId = SlotId;
+
+constexpr u32 NUM_VERTEX_BUFFERS = 32;
+constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
+constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
+constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
+constexpr u32 NUM_STORAGE_BUFFERS = 16;
+constexpr u32 NUM_STAGES = 5;
+
+template <typename P>
 class BufferCache {
-    using IntervalSet = boost::icl::interval_set<VAddr>;
-    using IntervalType = typename IntervalSet::interval_type;
-    using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
+    // Page size for caching purposes.
+    // This is unrelated to the CPU page size and it can be changed as it seems optimal.
+    static constexpr u32 PAGE_BITS = 16;
+    static constexpr u64 PAGE_SIZE = u64{1} << PAGE_BITS;
+
+    static constexpr bool IS_OPENGL = P::IS_OPENGL;
+    static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
+        P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
+    static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
+        P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
+    static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
+    static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
+    static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
+
+    static constexpr BufferId NULL_BUFFER_ID{0};
+
+    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+    using Runtime = typename P::Runtime;
+    using Buffer = typename P::Buffer;
+
+    struct Empty {};
+
+    struct OverlapResult {
+        std::vector<BufferId> ids;
+        VAddr begin;
+        VAddr end;
+        bool has_stream_leap = false;
+    };
 
-    static constexpr u64 WRITE_PAGE_BIT = 11;
-    static constexpr u64 BLOCK_PAGE_BITS = 21;
-    static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
+    struct Binding {
+        VAddr cpu_addr{};
+        u32 size{};
+        BufferId buffer_id;
+    };
 
-public:
-    struct BufferInfo {
-        BufferType handle;
-        u64 offset;
-        u64 address;
+    static constexpr Binding NULL_BINDING{
+        .cpu_addr = 0,
+        .size = 0,
+        .buffer_id = NULL_BUFFER_ID,
     };
 
-    BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
-                            bool is_written = false, bool use_fast_cbuf = false) {
-        std::lock_guard lock{mutex};
+public:
+    static constexpr u32 SKIP_CACHE_SIZE = 4096;
 
-        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
-        if (!cpu_addr) {
-            return GetEmptyBuffer(size);
-        }
+    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+                         Tegra::Engines::Maxwell3D& maxwell3d_,
+                         Tegra::Engines::KeplerCompute& kepler_compute_,
+                         Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+                         Runtime& runtime_);
 
-        // Cache management is a big overhead, so only cache entries with a given size.
-        // TODO: Figure out which size is the best for given games.
-        constexpr std::size_t max_stream_size = 0x800;
-        if (use_fast_cbuf || size < max_stream_size) {
-            if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
-                const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
-                if (use_fast_cbuf) {
-                    u8* dest;
-                    if (is_granular) {
-                        dest = gpu_memory.GetPointer(gpu_addr);
-                    } else {
-                        staging_buffer.resize(size);
-                        dest = staging_buffer.data();
-                        gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
-                    }
-                    return ConstBufferUpload(dest, size);
-                }
-                if (is_granular) {
-                    u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
-                    return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
-                        std::memcpy(dest, host_ptr, size);
-                    });
-                } else {
-                    return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
-                        gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
-                    });
-                }
-            }
-        }
+    void TickFrame();
 
-        Buffer* const block = GetBlock(*cpu_addr, size);
-        MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
-        if (!map) {
-            return GetEmptyBuffer(size);
-        }
-        if (is_written) {
-            map->MarkAsModified(true, GetModifiedTicks());
-            if (Settings::IsGPULevelHigh() &&
-                Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
-                MarkForAsyncFlush(map);
-            }
-            if (!map->is_written) {
-                map->is_written = true;
-                MarkRegionAsWritten(map->start, map->end - 1);
-            }
-        }
+    void WriteMemory(VAddr cpu_addr, u64 size);
 
-        return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()};
-    }
+    void CachedWriteMemory(VAddr cpu_addr, u64 size);
 
-    /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
-    BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
-                                std::size_t alignment = 4) {
-        std::lock_guard lock{mutex};
-        return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
-            std::memcpy(dest, raw_pointer, size);
-        });
-    }
+    void DownloadMemory(VAddr cpu_addr, u64 size);
 
-    /// Prepares the buffer cache for data uploading
-    /// @param max_size Maximum number of bytes that will be uploaded
-    /// @return True when a stream buffer invalidation was required, false otherwise
-    void Map(std::size_t max_size) {
-        std::lock_guard lock{mutex};
+    void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
 
-        std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);
-        buffer_offset = buffer_offset_base;
-    }
+    void UpdateGraphicsBuffers(bool is_indexed);
 
-    /// Finishes the upload stream
-    void Unmap() {
-        std::lock_guard lock{mutex};
-        stream_buffer.Unmap(buffer_offset - buffer_offset_base);
-    }
+    void UpdateComputeBuffers();
 
-    /// Function called at the end of each frame, inteded for deferred operations
-    void TickFrame() {
-        ++epoch;
+    void BindHostGeometryBuffers(bool is_indexed);
 
-        while (!pending_destruction.empty()) {
-            // Delay at least 4 frames before destruction.
-            // This is due to triple buffering happening on some drivers.
-            static constexpr u64 epochs_to_destroy = 5;
-            if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
-                break;
-            }
-            pending_destruction.pop();
-        }
-    }
+    void BindHostStageBuffers(size_t stage);
 
-    /// Write any cached resources overlapping the specified region back to memory
-    void FlushRegion(VAddr addr, std::size_t size) {
-        std::lock_guard lock{mutex};
+    void BindHostComputeBuffers();
 
-        VectorMapInterval objects = GetMapsInRange(addr, size);
-        std::sort(objects.begin(), objects.end(),
-                  [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
-        for (MapInterval* object : objects) {
-            if (object->is_modified && object->is_registered) {
-                mutex.unlock();
-                FlushMap(object);
-                mutex.lock();
-            }
-        }
-    }
+    void SetEnabledUniformBuffers(size_t stage, u32 enabled);
 
-    bool MustFlushRegion(VAddr addr, std::size_t size) {
-        std::lock_guard lock{mutex};
+    void SetEnabledComputeUniformBuffers(u32 enabled);
 
-        const VectorMapInterval objects = GetMapsInRange(addr, size);
-        return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
-            return map->is_modified && map->is_registered;
-        });
-    }
+    void UnbindGraphicsStorageBuffers(size_t stage);
 
-    /// Mark the specified region as being invalidated
-    void InvalidateRegion(VAddr addr, u64 size) {
-        std::lock_guard lock{mutex};
+    void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+                                   bool is_written);
 
-        for (auto& object : GetMapsInRange(addr, size)) {
-            if (object->is_registered) {
-                Unregister(object);
-            }
-        }
-    }
+    void UnbindComputeStorageBuffers();
 
-    void OnCPUWrite(VAddr addr, std::size_t size) {
-        std::lock_guard lock{mutex};
+    void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+                                  bool is_written);
 
-        for (MapInterval* object : GetMapsInRange(addr, size)) {
-            if (object->is_memory_marked && object->is_registered) {
-                UnmarkMemory(object);
-                object->is_sync_pending = true;
-                marked_for_unregister.emplace_back(object);
-            }
-        }
-    }
+    void FlushCachedWrites();
 
-    void SyncGuestHost() {
-        std::lock_guard lock{mutex};
+    /// Return true when there are uncommitted buffers to be downloaded
+    [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
 
-        for (auto& object : marked_for_unregister) {
-            if (object->is_registered) {
-                object->is_sync_pending = false;
-                Unregister(object);
-            }
+    /// Return true when the caller should wait for async downloads
+    [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
+
+    /// Commit asynchronous downloads
+    void CommitAsyncFlushes();
+
+    /// Pop asynchronous downloads
+    void PopAsyncFlushes();
+
+    /// Return true when a CPU region is modified from the GPU
+    [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
+
+    std::mutex mutex;
+
+private:
+    template <typename Func>
+    static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
+        for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
+            const int disabled_bits = std::countr_zero(enabled_mask);
+            index += disabled_bits;
+            enabled_mask >>= disabled_bits;
+            func(index);
         }
-        marked_for_unregister.clear();
     }
 
-    void CommitAsyncFlushes() {
-        if (uncommitted_flushes) {
-            auto commit_list = std::make_shared<std::list<MapInterval*>>();
-            for (MapInterval* map : *uncommitted_flushes) {
-                if (map->is_registered && map->is_modified) {
-                    // TODO(Blinkhawk): Implement backend asynchronous flushing
-                    // AsyncFlushMap(map)
-                    commit_list->push_back(map);
-                }
-            }
-            if (!commit_list->empty()) {
-                committed_flushes.push_back(commit_list);
-            } else {
-                committed_flushes.emplace_back();
+    template <typename Func>
+    void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
+        const u64 page_end = Common::DivCeil(cpu_addr + size, PAGE_SIZE);
+        for (u64 page = cpu_addr >> PAGE_BITS; page < page_end;) {
+            const BufferId buffer_id = page_table[page];
+            if (!buffer_id) {
+                ++page;
+                continue;
             }
-        } else {
-            committed_flushes.emplace_back();
+            Buffer& buffer = slot_buffers[buffer_id];
+            func(buffer_id, buffer);
+
+            const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
+            page = Common::DivCeil(end_addr, PAGE_SIZE);
         }
-        uncommitted_flushes.reset();
     }
 
-    bool ShouldWaitAsyncFlushes() const {
-        return !committed_flushes.empty() && committed_flushes.front() != nullptr;
+    static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
+        return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
+               ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
     }
 
-    bool HasUncommittedFlushes() const {
-        return uncommitted_flushes != nullptr;
-    }
+    void BindHostIndexBuffer();
 
-    void PopAsyncFlushes() {
-        if (committed_flushes.empty()) {
-            return;
-        }
-        auto& flush_list = committed_flushes.front();
-        if (!flush_list) {
-            committed_flushes.pop_front();
-            return;
-        }
-        for (MapInterval* map : *flush_list) {
-            if (map->is_registered) {
-                // TODO(Blinkhawk): Replace this for reading the asynchronous flush
-                FlushMap(map);
-            }
-        }
-        committed_flushes.pop_front();
-    }
+    void BindHostVertexBuffers();
 
-    virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
+    void BindHostGraphicsUniformBuffers(size_t stage);
 
-protected:
-    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
-                         Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                         StreamBuffer& stream_buffer_)
-        : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
-          stream_buffer{stream_buffer_} {}
+    void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
 
-    ~BufferCache() = default;
+    void BindHostGraphicsStorageBuffers(size_t stage);
 
-    virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
+    void BindHostTransformFeedbackBuffers();
 
-    virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
-        return {};
-    }
+    void BindHostComputeUniformBuffers();
 
-    /// Register an object into the cache
-    MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
-        const VAddr cpu_addr = new_map.start;
-        if (!cpu_addr) {
-            LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
-                         new_map.gpu_addr);
-            return nullptr;
-        }
-        const std::size_t size = new_map.end - new_map.start;
-        new_map.is_registered = true;
-        rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
-        new_map.is_memory_marked = true;
-        if (inherit_written) {
-            MarkRegionAsWritten(new_map.start, new_map.end - 1);
-            new_map.is_written = true;
-        }
-        MapInterval* const storage = mapped_addresses_allocator.Allocate();
-        *storage = new_map;
-        mapped_addresses.insert(*storage);
-        return storage;
-    }
+    void BindHostComputeStorageBuffers();
 
-    void UnmarkMemory(MapInterval* map) {
-        if (!map->is_memory_marked) {
-            return;
-        }
-        const std::size_t size = map->end - map->start;
-        rasterizer.UpdatePagesCachedCount(map->start, size, -1);
-        map->is_memory_marked = false;
-    }
-
-    /// Unregisters an object from the cache
-    void Unregister(MapInterval* map) {
-        UnmarkMemory(map);
-        map->is_registered = false;
-        if (map->is_sync_pending) {
-            map->is_sync_pending = false;
-            marked_for_unregister.remove(map);
+    void DoUpdateGraphicsBuffers(bool is_indexed);
+
+    void DoUpdateComputeBuffers();
+
+    void UpdateIndexBuffer();
+
+    void UpdateVertexBuffers();
+
+    void UpdateVertexBuffer(u32 index);
+
+    void UpdateUniformBuffers(size_t stage);
+
+    void UpdateStorageBuffers(size_t stage);
+
+    void UpdateTransformFeedbackBuffers();
+
+    void UpdateTransformFeedbackBuffer(u32 index);
+
+    void UpdateComputeUniformBuffers();
+
+    void UpdateComputeStorageBuffers();
+
+    void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
+
+    [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
+
+    [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
+
+    void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
+
+    [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
+
+    void Register(BufferId buffer_id);
+
+    void Unregister(BufferId buffer_id);
+
+    template <bool insert>
+    void ChangeRegister(BufferId buffer_id);
+
+    void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
+
+    void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
+
+    void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
+                      std::span<BufferCopy> copies);
+
+    void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
+                               std::span<const BufferCopy> copies);
+
+    void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
+
+    void DeleteBuffer(BufferId buffer_id);
+
+    void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
+
+    void NotifyBufferDeletion();
+
+    [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
+
+    [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
+
+    [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
+
+    [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
+
+    VideoCore::RasterizerInterface& rasterizer;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::Engines::KeplerCompute& kepler_compute;
+    Tegra::MemoryManager& gpu_memory;
+    Core::Memory::Memory& cpu_memory;
+    Runtime& runtime;
+
+    SlotVector<Buffer> slot_buffers;
+    DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
+
+    u32 last_index_count = 0;
+
+    Binding index_buffer;
+    std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
+    std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
+    std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
+    std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
+
+    std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
+    std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
+
+    std::array<u32, NUM_STAGES> enabled_uniform_buffers{};
+    u32 enabled_compute_uniform_buffers = 0;
+
+    std::array<u32, NUM_STAGES> enabled_storage_buffers{};
+    std::array<u32, NUM_STAGES> written_storage_buffers{};
+    u32 enabled_compute_storage_buffers = 0;
+    u32 written_compute_storage_buffers = 0;
+
+    std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
+
+    bool has_deleted_buffers = false;
+
+    std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
+        dirty_uniform_buffers{};
+
+    std::vector<BufferId> cached_write_buffer_ids;
+
+    // TODO: This data structure is not optimal and it should be reworked
+    std::vector<BufferId> uncommitted_downloads;
+    std::deque<std::vector<BufferId>> committed_downloads;
+
+    size_t immediate_buffer_capacity = 0;
+    std::unique_ptr<u8[]> immediate_buffer_alloc;
+
+    std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
+};
+
+template <class P>
+BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+                            Tegra::Engines::Maxwell3D& maxwell3d_,
+                            Tegra::Engines::KeplerCompute& kepler_compute_,
+                            Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+                            Runtime& runtime_)
+    : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
+      gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
+    // Ensure the first slot is used for the null buffer
+    void(slot_buffers.insert(runtime, NullBufferParams{}));
+}
+
+template <class P>
+void BufferCache<P>::TickFrame() {
+    delayed_destruction_ring.Tick();
+}
+
+template <class P>
+void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
+    ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
+        buffer.MarkRegionAsCpuModified(cpu_addr, size);
+    });
+}
+
+template <class P>
+void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
+    ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
+        if (!buffer.HasCachedWrites()) {
+            cached_write_buffer_ids.push_back(buffer_id);
         }
-        if (map->is_written) {
-            UnmarkRegionAsWritten(map->start, map->end - 1);
+        buffer.CachedCpuWrite(cpu_addr, size);
+    });
+}
+
+template <class P>
+void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
+    ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
+        boost::container::small_vector<BufferCopy, 1> copies;
+        u64 total_size_bytes = 0;
+        u64 largest_copy = 0;
+        buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+            copies.push_back(BufferCopy{
+                .src_offset = range_offset,
+                .dst_offset = total_size_bytes,
+                .size = range_size,
+            });
+            total_size_bytes += range_size;
+            largest_copy = std::max(largest_copy, range_size);
+        });
+        if (total_size_bytes == 0) {
+            return;
         }
-        const auto it = mapped_addresses.find(*map);
-        ASSERT(it != mapped_addresses.end());
-        mapped_addresses.erase(it);
-        mapped_addresses_allocator.Release(map);
-    }
-
-private:
-    MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
-        const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
-        if (overlaps.empty()) {
-            const VAddr cpu_addr_end = cpu_addr + size;
-            if (gpu_memory.IsGranularRange(gpu_addr, size)) {
-                u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
-                block->Upload(block->Offset(cpu_addr), size, host_ptr);
-            } else {
-                staging_buffer.resize(size);
-                gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
-                block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
+        MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+        if constexpr (USE_MEMORY_MAPS) {
+            auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+            const u8* const mapped_memory = download_staging.mapped_span.data();
+            const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
+            for (BufferCopy& copy : copies) {
+                // Modify copies to have the staging offset in mind
+                copy.dst_offset += download_staging.offset;
             }
-            return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
-        }
-
-        const VAddr cpu_addr_end = cpu_addr + size;
-        if (overlaps.size() == 1) {
-            MapInterval* const current_map = overlaps[0];
-            if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
-                return current_map;
+            runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
+            runtime.Finish();
+            for (const BufferCopy& copy : copies) {
+                const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+                // Undo the modified offset
+                const u64 dst_offset = copy.dst_offset - download_staging.offset;
+                const u8* copy_mapped_memory = mapped_memory + dst_offset;
+                cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
+            }
+        } else {
+            const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+            for (const BufferCopy& copy : copies) {
+                buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+                const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+                cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
             }
         }
-        VAddr new_start = cpu_addr;
-        VAddr new_end = cpu_addr_end;
-        bool write_inheritance = false;
-        bool modified_inheritance = false;
-        // Calculate new buffer parameters
-        for (MapInterval* overlap : overlaps) {
-            new_start = std::min(overlap->start, new_start);
-            new_end = std::max(overlap->end, new_end);
-            write_inheritance |= overlap->is_written;
-            modified_inheritance |= overlap->is_modified;
+    });
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+                                               u32 size) {
+    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    if (!cpu_addr) {
+        uniform_buffers[stage][index] = NULL_BINDING;
+        return;
+    }
+    const Binding binding{
+        .cpu_addr = *cpu_addr,
+        .size = size,
+        .buffer_id = BufferId{},
+    };
+    uniform_buffers[stage][index] = binding;
+}
+
+template <class P>
+void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
+    MICROPROFILE_SCOPE(GPU_PrepareBuffers);
+    do {
+        has_deleted_buffers = false;
+        DoUpdateGraphicsBuffers(is_indexed);
+    } while (has_deleted_buffers);
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeBuffers() {
+    MICROPROFILE_SCOPE(GPU_PrepareBuffers);
+    do {
+        has_deleted_buffers = false;
+        DoUpdateComputeBuffers();
+    } while (has_deleted_buffers);
+}
+
+template <class P>
+void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
+    MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+    if (is_indexed) {
+        BindHostIndexBuffer();
+    } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
+        const auto& regs = maxwell3d.regs;
+        if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
+            runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
         }
-        GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
-        for (auto& overlap : overlaps) {
-            Unregister(overlap);
+    }
+    BindHostVertexBuffers();
+    BindHostTransformFeedbackBuffers();
+}
+
+template <class P>
+void BufferCache<P>::BindHostStageBuffers(size_t stage) {
+    MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+    BindHostGraphicsUniformBuffers(stage);
+    BindHostGraphicsStorageBuffers(stage);
+}
+
+template <class P>
+void BufferCache<P>::BindHostComputeBuffers() {
+    MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+    BindHostComputeUniformBuffers();
+    BindHostComputeStorageBuffers();
+}
+
+template <class P>
+void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) {
+    if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+        if (enabled_uniform_buffers[stage] != enabled) {
+            dirty_uniform_buffers[stage] = ~u32{0};
         }
-        UpdateBlock(block, new_start, new_end, overlaps);
-
-        const MapInterval new_map{new_start, new_end, new_gpu_addr};
-        MapInterval* const map = Register(new_map, write_inheritance);
-        if (!map) {
-            return nullptr;
+    }
+    enabled_uniform_buffers[stage] = enabled;
+}
+
+template <class P>
+void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) {
+    enabled_compute_uniform_buffers = enabled;
+}
+
+template <class P>
+void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
+    enabled_storage_buffers[stage] = 0;
+    written_storage_buffers[stage] = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
+                                               u32 cbuf_offset, bool is_written) {
+    enabled_storage_buffers[stage] |= 1U << ssbo_index;
+    written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
+
+    const auto& cbufs = maxwell3d.state.shader_stages[stage];
+    const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
+    storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
+}
+
+template <class P>
+void BufferCache<P>::UnbindComputeStorageBuffers() {
+    enabled_compute_storage_buffers = 0;
+    written_compute_storage_buffers = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+                                              bool is_written) {
+    enabled_compute_storage_buffers |= 1U << ssbo_index;
+    written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
+
+    const auto& launch_desc = kepler_compute.launch_description;
+    ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
+
+    const auto& cbufs = launch_desc.const_buffer_config;
+    const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
+    compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr);
+}
+
+template <class P>
+void BufferCache<P>::FlushCachedWrites() {
+    for (const BufferId buffer_id : cached_write_buffer_ids) {
+        slot_buffers[buffer_id].FlushCachedWrites();
+    }
+    cached_write_buffer_ids.clear();
+}
+
+template <class P>
+bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
+    return !uncommitted_downloads.empty();
+}
+
+template <class P>
+bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
+    return !committed_downloads.empty() && !committed_downloads.front().empty();
+}
+
+template <class P>
+void BufferCache<P>::CommitAsyncFlushes() {
+    // This is intentionally passing the value by copy
+    committed_downloads.push_front(uncommitted_downloads);
+    uncommitted_downloads.clear();
+}
+
+template <class P>
+void BufferCache<P>::PopAsyncFlushes() {
+    if (committed_downloads.empty()) {
+        return;
+    }
+    auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); });
+    const std::span<const BufferId> download_ids = committed_downloads.back();
+    if (download_ids.empty()) {
+        return;
+    }
+    MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+    boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
+    u64 total_size_bytes = 0;
+    u64 largest_copy = 0;
+    for (const BufferId buffer_id : download_ids) {
+        slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) {
+            downloads.push_back({
+                BufferCopy{
+                    .src_offset = range_offset,
+                    .dst_offset = total_size_bytes,
+                    .size = range_size,
+                },
+                buffer_id,
+            });
+            total_size_bytes += range_size;
+            largest_copy = std::max(largest_copy, range_size);
+        });
+    }
+    if (downloads.empty()) {
+        return;
+    }
+    if constexpr (USE_MEMORY_MAPS) {
+        auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+        for (auto& [copy, buffer_id] : downloads) {
+            // Have in mind the staging buffer offset for the copy
+            copy.dst_offset += download_staging.offset;
+            const std::array copies{copy};
+            runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
         }
-        if (modified_inheritance) {
-            map->MarkAsModified(true, GetModifiedTicks());
-            if (Settings::IsGPULevelHigh() &&
-                Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
-                MarkForAsyncFlush(map);
-            }
+        runtime.Finish();
+        for (const auto [copy, buffer_id] : downloads) {
+            const Buffer& buffer = slot_buffers[buffer_id];
+            const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
+            // Undo the modified offset
+            const u64 dst_offset = copy.dst_offset - download_staging.offset;
+            const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
+            cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
+        }
+    } else {
+        const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+        for (const auto [copy, buffer_id] : downloads) {
+            Buffer& buffer = slot_buffers[buffer_id];
+            buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+            const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
+            cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
         }
-        return map;
     }
-
-    void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) {
-        const IntervalType base_interval{start, end};
-        IntervalSet interval_set{};
-        interval_set.add(base_interval);
-        for (auto& overlap : overlaps) {
-            const IntervalType subtract{overlap->start, overlap->end};
-            interval_set.subtract(subtract);
+}
+
+template <class P>
+bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
+    const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
+    for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+        const BufferId image_id = page_table[page];
+        if (!image_id) {
+            ++page;
+            continue;
         }
-        for (auto& interval : interval_set) {
-            const std::size_t size = interval.upper() - interval.lower();
-            if (size == 0) {
-                continue;
-            }
-            staging_buffer.resize(size);
-            cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
-            block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
+        Buffer& buffer = slot_buffers[image_id];
+        if (buffer.IsRegionGpuModified(addr, size)) {
+            return true;
         }
+        const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
+        page = Common::DivCeil(end_addr, PAGE_SIZE);
     }
-
-    VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) {
-        VectorMapInterval result;
-        if (size == 0) {
-            return result;
+    return false;
+}
+
+template <class P>
+void BufferCache<P>::BindHostIndexBuffer() {
+    Buffer& buffer = slot_buffers[index_buffer.buffer_id];
+    const u32 offset = buffer.Offset(index_buffer.cpu_addr);
+    const u32 size = index_buffer.size;
+    SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
+    if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
+        runtime.BindIndexBuffer(buffer, offset, size);
+    } else {
+        runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
+                                maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
+                                buffer, offset, size);
+    }
+}
+
+template <class P>
+void BufferCache<P>::BindHostVertexBuffers() {
+    auto& flags = maxwell3d.dirty.flags;
+    for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+        const Binding& binding = vertex_buffers[index];
+        Buffer& buffer = slot_buffers[binding.buffer_id];
+        SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
+        if (!flags[Dirty::VertexBuffer0 + index]) {
+            continue;
         }
+        flags[Dirty::VertexBuffer0 + index] = false;
 
-        const VAddr addr_end = addr + size;
-        auto it = mapped_addresses.lower_bound(addr);
-        if (it != mapped_addresses.begin()) {
-            --it;
+        const u32 stride = maxwell3d.regs.vertex_array[index].stride;
+        const u32 offset = buffer.Offset(binding.cpu_addr);
+        runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
+    }
+}
+
+template <class P>
+void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
+    u32 dirty = ~0U;
+    if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+        dirty = std::exchange(dirty_uniform_buffers[stage], 0);
+    }
+    u32 binding_index = 0;
+    ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+        const bool needs_bind = ((dirty >> index) & 1) != 0;
+        BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
+        if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+            ++binding_index;
         }
-        while (it != mapped_addresses.end() && it->start < addr_end) {
-            if (it->Overlaps(addr, addr_end)) {
-                result.push_back(&*it);
+    });
+}
+
+template <class P>
+void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
+                                                   bool needs_bind) {
+    const Binding& binding = uniform_buffers[stage][index];
+    const VAddr cpu_addr = binding.cpu_addr;
+    const u32 size = binding.size;
+    Buffer& buffer = slot_buffers[binding.buffer_id];
+    if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
+        if constexpr (IS_OPENGL) {
+            if (runtime.HasFastBufferSubData()) {
+                // Fast path for Nvidia
+                if (!HasFastUniformBufferBound(stage, binding_index)) {
+                    // We only have to bind when the currently bound buffer is not the fast version
+                    runtime.BindFastUniformBuffer(stage, binding_index, size);
+                }
+                const auto span = ImmediateBufferWithData(cpu_addr, size);
+                runtime.PushFastUniformBuffer(stage, binding_index, span);
+                return;
             }
-            ++it;
         }
-        return result;
-    }
+        fast_bound_uniform_buffers[stage] |= 1U << binding_index;
 
-    /// Returns a ticks counter used for tracking when cached objects were last modified
-    u64 GetModifiedTicks() {
-        return ++modified_ticks;
+        // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
+        const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
+        cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
+        return;
     }
-
-    void FlushMap(MapInterval* map) {
-        const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
-        ASSERT_OR_EXECUTE(it != blocks.end(), return;);
-
-        std::shared_ptr<Buffer> block = it->second;
-
-        const std::size_t size = map->end - map->start;
-        staging_buffer.resize(size);
-        block->Download(block->Offset(map->start), size, staging_buffer.data());
-        cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
-        map->MarkAsModified(false, 0);
+    // Classic cached path
+    SynchronizeBuffer(buffer, cpu_addr, size);
+    if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
+        // Skip binding if it's not needed and if the bound buffer is not the fast version
+        // This exists to avoid instances where the fast buffer is bound and a GPU write happens
+        return;
     }
+    fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
 
-    template <typename Callable>
-    BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
-        AlignBuffer(alignment);
-        const std::size_t uploaded_offset = buffer_offset;
-        callable(buffer_ptr);
-
-        buffer_ptr += size;
-        buffer_offset += size;
-        return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
+    const u32 offset = buffer.Offset(cpu_addr);
+    if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+        runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
+    } else {
+        runtime.BindUniformBuffer(buffer, offset, size);
     }
+}
+
+template <class P>
+void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
+    u32 binding_index = 0;
+    ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
+        const Binding& binding = storage_buffers[stage][index];
+        Buffer& buffer = slot_buffers[binding.buffer_id];
+        const u32 size = binding.size;
+        SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+        const u32 offset = buffer.Offset(binding.cpu_addr);
+        const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0;
+        if constexpr (NEEDS_BIND_STORAGE_INDEX) {
+            runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
+            ++binding_index;
+        } else {
+            runtime.BindStorageBuffer(buffer, offset, size, is_written);
+        }
+    });
+}
 
-    void AlignBuffer(std::size_t alignment) {
-        // Align the offset, not the mapped pointer
-        const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
-        buffer_ptr += offset_aligned - buffer_offset;
-        buffer_offset = offset_aligned;
+template <class P>
+void BufferCache<P>::BindHostTransformFeedbackBuffers() {
+    if (maxwell3d.regs.tfb_enabled == 0) {
+        return;
     }
+    for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
+        const Binding& binding = transform_feedback_buffers[index];
+        Buffer& buffer = slot_buffers[binding.buffer_id];
+        const u32 size = binding.size;
+        SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+        const u32 offset = buffer.Offset(binding.cpu_addr);
+        runtime.BindTransformFeedbackBuffer(index, buffer, offset, size);
+    }
+}
 
-    std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
-        const std::size_t old_size = buffer->Size();
-        const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
-        const VAddr cpu_addr = buffer->CpuAddr();
-        std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
-        new_buffer->CopyFrom(*buffer, 0, 0, old_size);
-        QueueDestruction(std::move(buffer));
-
-        const VAddr cpu_addr_end = cpu_addr + new_size - 1;
-        const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
-        for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
-            blocks.insert_or_assign(page_start, new_buffer);
+template <class P>
+void BufferCache<P>::BindHostComputeUniformBuffers() {
+    if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+        // Mark all uniform buffers as dirty
+        dirty_uniform_buffers.fill(~u32{0});
+    }
+    u32 binding_index = 0;
+    ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+        const Binding& binding = compute_uniform_buffers[index];
+        Buffer& buffer = slot_buffers[binding.buffer_id];
+        const u32 size = binding.size;
+        SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+        const u32 offset = buffer.Offset(binding.cpu_addr);
+        if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+            runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
+            ++binding_index;
+        } else {
+            runtime.BindUniformBuffer(buffer, offset, size);
         }
+    });
+}
+
+template <class P>
+void BufferCache<P>::BindHostComputeStorageBuffers() {
+    u32 binding_index = 0;
+    ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
+        const Binding& binding = compute_storage_buffers[index];
+        Buffer& buffer = slot_buffers[binding.buffer_id];
+        const u32 size = binding.size;
+        SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+        const u32 offset = buffer.Offset(binding.cpu_addr);
+        const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0;
+        if constexpr (NEEDS_BIND_STORAGE_INDEX) {
+            runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
+            ++binding_index;
+        } else {
+            runtime.BindStorageBuffer(buffer, offset, size, is_written);
+        }
+    });
+}
 
-        return new_buffer;
+template <class P>
+void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
+    if (is_indexed) {
+        UpdateIndexBuffer();
     }
+    UpdateVertexBuffers();
+    UpdateTransformFeedbackBuffers();
+    for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+        UpdateUniformBuffers(stage);
+        UpdateStorageBuffers(stage);
+    }
+}
+
+template <class P>
+void BufferCache<P>::DoUpdateComputeBuffers() {
+    UpdateComputeUniformBuffers();
+    UpdateComputeStorageBuffers();
+}
+
+template <class P>
+void BufferCache<P>::UpdateIndexBuffer() {
+    // We have to check for the dirty flags and index count
+    // The index count is currently changed without updating the dirty flags
+    const auto& index_array = maxwell3d.regs.index_array;
+    auto& flags = maxwell3d.dirty.flags;
+    if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
+        return;
+    }
+    flags[Dirty::IndexBuffer] = false;
+    last_index_count = index_array.count;
+
+    const GPUVAddr gpu_addr_begin = index_array.StartAddress();
+    const GPUVAddr gpu_addr_end = index_array.EndAddress();
+    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+    const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+    const u32 draw_size = index_array.count * index_array.FormatSizeInBytes();
+    const u32 size = std::min(address_size, draw_size);
+    if (size == 0 || !cpu_addr) {
+        index_buffer = NULL_BINDING;
+        return;
+    }
+    index_buffer = Binding{
+        .cpu_addr = *cpu_addr,
+        .size = size,
+        .buffer_id = FindBuffer(*cpu_addr, size),
+    };
+}
 
-    std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
-                                        std::shared_ptr<Buffer> second) {
-        const std::size_t size_1 = first->Size();
-        const std::size_t size_2 = second->Size();
-        const VAddr first_addr = first->CpuAddr();
-        const VAddr second_addr = second->CpuAddr();
-        const VAddr new_addr = std::min(first_addr, second_addr);
-        const std::size_t new_size = size_1 + size_2;
-
-        std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
-        new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
-        new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
-        QueueDestruction(std::move(first));
-        QueueDestruction(std::move(second));
+template <class P>
+void BufferCache<P>::UpdateVertexBuffers() {
+    auto& flags = maxwell3d.dirty.flags;
+    if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
+        return;
+    }
+    flags[Dirty::VertexBuffers] = false;
 
-        const VAddr cpu_addr_end = new_addr + new_size - 1;
-        const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
-        for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
-            blocks.insert_or_assign(page_start, new_buffer);
-        }
-        return new_buffer;
+    for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+        UpdateVertexBuffer(index);
     }
+}
 
-    Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
-        std::shared_ptr<Buffer> found;
+template <class P>
+void BufferCache<P>::UpdateVertexBuffer(u32 index) {
+    if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
+        return;
+    }
+    const auto& array = maxwell3d.regs.vertex_array[index];
+    const auto& limit = maxwell3d.regs.vertex_array_limit[index];
+    const GPUVAddr gpu_addr_begin = array.StartAddress();
+    const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
+    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+    const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+    const u32 size = address_size; // TODO: Analyze stride and number of vertices
+    if (array.enable == 0 || size == 0 || !cpu_addr) {
+        vertex_buffers[index] = NULL_BINDING;
+        return;
+    }
+    vertex_buffers[index] = Binding{
+        .cpu_addr = *cpu_addr,
+        .size = size,
+        .buffer_id = FindBuffer(*cpu_addr, size),
+    };
+}
+
+template <class P>
+void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
+    ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+        Binding& binding = uniform_buffers[stage][index];
+        if (binding.buffer_id) {
+            // Already updated
+            return;
+        }
+        // Mark as dirty
+        if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+            dirty_uniform_buffers[stage] |= 1U << index;
+        }
+        // Resolve buffer
+        binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+    });
+}
+
+template <class P>
+void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
+    const u32 written_mask = written_storage_buffers[stage];
+    ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
+        // Resolve buffer
+        Binding& binding = storage_buffers[stage][index];
+        const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+        binding.buffer_id = buffer_id;
+        // Mark buffer as written if needed
+        if (((written_mask >> index) & 1) != 0) {
+            MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+        }
+    });
+}
 
-        const VAddr cpu_addr_end = cpu_addr + size - 1;
-        const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
-        for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
-            auto it = blocks.find(page_start);
-            if (it == blocks.end()) {
-                if (found) {
-                    found = EnlargeBlock(found);
-                    continue;
-                }
-                const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
-                found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
-                blocks.insert_or_assign(page_start, found);
-                continue;
-            }
-            if (!found) {
-                found = it->second;
-                continue;
-            }
-            if (found != it->second) {
-                found = MergeBlocks(std::move(found), it->second);
+template <class P>
+void BufferCache<P>::UpdateTransformFeedbackBuffers() {
+    if (maxwell3d.regs.tfb_enabled == 0) {
+        return;
+    }
+    for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
+        UpdateTransformFeedbackBuffer(index);
+    }
+}
+
+template <class P>
+void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
+    const auto& binding = maxwell3d.regs.tfb_bindings[index];
+    const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
+    const u32 size = binding.buffer_size;
+    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
+        transform_feedback_buffers[index] = NULL_BINDING;
+        return;
+    }
+    const BufferId buffer_id = FindBuffer(*cpu_addr, size);
+    transform_feedback_buffers[index] = Binding{
+        .cpu_addr = *cpu_addr,
+        .size = size,
+        .buffer_id = buffer_id,
+    };
+    MarkWrittenBuffer(buffer_id, *cpu_addr, size);
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeUniformBuffers() {
+    ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+        Binding& binding = compute_uniform_buffers[index];
+        binding = NULL_BINDING;
+        const auto& launch_desc = kepler_compute.launch_description;
+        if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
+            const auto& cbuf = launch_desc.const_buffer_config[index];
+            const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
+            if (cpu_addr) {
+                binding.cpu_addr = *cpu_addr;
+                binding.size = cbuf.size;
             }
         }
-        return found.get();
+        binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+    });
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeStorageBuffers() {
+    ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
+        // Resolve buffer
+        Binding& binding = compute_storage_buffers[index];
+        const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+        binding.buffer_id = buffer_id;
+        // Mark as written if needed
+        if (((written_compute_storage_buffers >> index) & 1) != 0) {
+            MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+        }
+    });
+}
+
+template <class P>
+void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
+    Buffer& buffer = slot_buffers[buffer_id];
+    buffer.MarkRegionAsGpuModified(cpu_addr, size);
+
+    const bool is_accuracy_high = Settings::IsGPULevelHigh();
+    const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
+    if (!is_accuracy_high || !is_async) {
+        return;
+    }
+    if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) {
+        // Already inserted
+        return;
     }
+    uncommitted_downloads.push_back(buffer_id);
+}
 
-    void MarkRegionAsWritten(VAddr start, VAddr end) {
-        const u64 page_end = end >> WRITE_PAGE_BIT;
-        for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
-            if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) {
-                ++it->second;
-            }
+template <class P>
+BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
+    if (cpu_addr == 0) {
+        return NULL_BUFFER_ID;
+    }
+    const u64 page = cpu_addr >> PAGE_BITS;
+    const BufferId buffer_id = page_table[page];
+    if (!buffer_id) {
+        return CreateBuffer(cpu_addr, size);
+    }
+    const Buffer& buffer = slot_buffers[buffer_id];
+    if (buffer.IsInBounds(cpu_addr, size)) {
+        return buffer_id;
+    }
+    return CreateBuffer(cpu_addr, size);
+}
+
+template <class P>
+typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
+                                                                       u32 wanted_size) {
+    static constexpr int STREAM_LEAP_THRESHOLD = 16;
+    std::vector<BufferId> overlap_ids;
+    VAddr begin = cpu_addr;
+    VAddr end = cpu_addr + wanted_size;
+    int stream_score = 0;
+    bool has_stream_leap = false;
+    for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) {
+        const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS];
+        if (!overlap_id) {
+            continue;
+        }
+        Buffer& overlap = slot_buffers[overlap_id];
+        if (overlap.IsPicked()) {
+            continue;
+        }
+        overlap_ids.push_back(overlap_id);
+        overlap.Pick();
+        const VAddr overlap_cpu_addr = overlap.CpuAddr();
+        if (overlap_cpu_addr < begin) {
+            cpu_addr = begin = overlap_cpu_addr;
+        }
+        end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
+
+        stream_score += overlap.StreamScore();
+        if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
+            // When this memory region has been joined a bunch of times, we assume it's being used
+            // as a stream buffer. Increase the size to skip constantly recreating buffers.
+            has_stream_leap = true;
+            end += PAGE_SIZE * 256;
         }
     }
-
-    void UnmarkRegionAsWritten(VAddr start, VAddr end) {
-        const u64 page_end = end >> WRITE_PAGE_BIT;
-        for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
-            auto it = written_pages.find(page_start);
-            if (it != written_pages.end()) {
-                if (it->second > 1) {
-                    --it->second;
-                } else {
-                    written_pages.erase(it);
-                }
-            }
+    return OverlapResult{
+        .ids = std::move(overlap_ids),
+        .begin = begin,
+        .end = end,
+        .has_stream_leap = has_stream_leap,
+    };
+}
+
+template <class P>
+void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
+                                 bool accumulate_stream_score) {
+    Buffer& new_buffer = slot_buffers[new_buffer_id];
+    Buffer& overlap = slot_buffers[overlap_id];
+    if (accumulate_stream_score) {
+        new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
+    }
+    std::vector<BufferCopy> copies;
+    const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
+    overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) {
+        copies.push_back(BufferCopy{
+            .src_offset = begin,
+            .dst_offset = dst_base_offset + begin,
+            .size = range_size,
+        });
+        new_buffer.UnmarkRegionAsCpuModified(begin, range_size);
+        new_buffer.MarkRegionAsGpuModified(begin, range_size);
+    });
+    if (!copies.empty()) {
+        runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies);
+    }
+    ReplaceBufferDownloads(overlap_id, new_buffer_id);
+    DeleteBuffer(overlap_id);
+}
+
+template <class P>
+BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
+    const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
+    const u32 size = static_cast<u32>(overlap.end - overlap.begin);
+    const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
+    for (const BufferId overlap_id : overlap.ids) {
+        JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
+    }
+    Register(new_buffer_id);
+    return new_buffer_id;
+}
+
+template <class P>
+void BufferCache<P>::Register(BufferId buffer_id) {
+    ChangeRegister<true>(buffer_id);
+}
+
+template <class P>
+void BufferCache<P>::Unregister(BufferId buffer_id) {
+    ChangeRegister<false>(buffer_id);
+}
+
+template <class P>
+template <bool insert>
+void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
+    const Buffer& buffer = slot_buffers[buffer_id];
+    const VAddr cpu_addr_begin = buffer.CpuAddr();
+    const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes();
+    const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
+    const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
+    for (u64 page = page_begin; page != page_end; ++page) {
+        if constexpr (insert) {
+            page_table[page] = buffer_id;
+        } else {
+            page_table[page] = BufferId{};
         }
     }
+}
 
-    bool IsRegionWritten(VAddr start, VAddr end) const {
-        const u64 page_end = end >> WRITE_PAGE_BIT;
-        for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
-            if (written_pages.contains(page_start)) {
-                return true;
+template <class P>
+void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
+    if (buffer.CpuAddr() == 0) {
+        return;
+    }
+    SynchronizeBufferImpl(buffer, cpu_addr, size);
+}
+
+template <class P>
+void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
+    boost::container::small_vector<BufferCopy, 4> copies;
+    u64 total_size_bytes = 0;
+    u64 largest_copy = 0;
+    buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+        copies.push_back(BufferCopy{
+            .src_offset = total_size_bytes,
+            .dst_offset = range_offset,
+            .size = range_size,
+        });
+        total_size_bytes += range_size;
+        largest_copy = std::max(largest_copy, range_size);
+    });
+    if (total_size_bytes == 0) {
+        return;
+    }
+    const std::span<BufferCopy> copies_span(copies.data(), copies.size());
+    UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
+}
+
+template <class P>
+void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
+                                  std::span<BufferCopy> copies) {
+    if constexpr (USE_MEMORY_MAPS) {
+        MappedUploadMemory(buffer, total_size_bytes, copies);
+    } else {
+        ImmediateUploadMemory(buffer, largest_copy, copies);
+    }
+}
+
+template <class P>
+void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
+                                           std::span<const BufferCopy> copies) {
+    std::span<u8> immediate_buffer;
+    for (const BufferCopy& copy : copies) {
+        std::span<const u8> upload_span;
+        const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
+        if (IsRangeGranular(cpu_addr, copy.size)) {
+            upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size);
+        } else {
+            if (immediate_buffer.empty()) {
+                immediate_buffer = ImmediateBuffer(largest_copy);
             }
+            cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
+            upload_span = immediate_buffer.subspan(0, copy.size);
         }
-        return false;
+        buffer.ImmediateUpload(copy.dst_offset, upload_span);
     }
-
-    void QueueDestruction(std::shared_ptr<Buffer> buffer) {
-        buffer->SetEpoch(epoch);
-        pending_destruction.push(std::move(buffer));
+}
+
+template <class P>
+void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
+                                        std::span<BufferCopy> copies) {
+    auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
+    const std::span<u8> staging_pointer = upload_staging.mapped_span;
+    for (BufferCopy& copy : copies) {
+        u8* const src_pointer = staging_pointer.data() + copy.src_offset;
+        const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
+        cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
+
+        // Apply the staging offset
+        copy.src_offset += upload_staging.offset;
     }
-
-    void MarkForAsyncFlush(MapInterval* map) {
-        if (!uncommitted_flushes) {
-            uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
+    runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+}
+
+template <class P>
+void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
+    const auto scalar_replace = [buffer_id](Binding& binding) {
+        if (binding.buffer_id == buffer_id) {
+            binding.buffer_id = BufferId{};
+        }
+    };
+    const auto replace = [scalar_replace](std::span<Binding> bindings) {
+        std::ranges::for_each(bindings, scalar_replace);
+    };
+    scalar_replace(index_buffer);
+    replace(vertex_buffers);
+    std::ranges::for_each(uniform_buffers, replace);
+    std::ranges::for_each(storage_buffers, replace);
+    replace(transform_feedback_buffers);
+    replace(compute_uniform_buffers);
+    replace(compute_storage_buffers);
+    std::erase(cached_write_buffer_ids, buffer_id);
+
+    // Mark the whole buffer as CPU written to stop tracking CPU writes
+    Buffer& buffer = slot_buffers[buffer_id];
+    buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
+
+    Unregister(buffer_id);
+    delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
+
+    NotifyBufferDeletion();
+}
+
+template <class P>
+void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) {
+    const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) {
+        std::ranges::replace(buffers, old_buffer_id, new_buffer_id);
+        if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) {
+            buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end());
         }
-        uncommitted_flushes->insert(map);
+    };
+    replace(uncommitted_downloads);
+    std::ranges::for_each(committed_downloads, replace);
+}
+
+template <class P>
+void BufferCache<P>::NotifyBufferDeletion() {
+    if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+        dirty_uniform_buffers.fill(~u32{0});
     }
+    auto& flags = maxwell3d.dirty.flags;
+    flags[Dirty::IndexBuffer] = true;
+    flags[Dirty::VertexBuffers] = true;
+    for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+        flags[Dirty::VertexBuffer0 + index] = true;
+    }
+    has_deleted_buffers = true;
+}
+
+template <class P>
+typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
+    const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
+    const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
+    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    if (!cpu_addr || size == 0) {
+        return NULL_BINDING;
+    }
+    // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range.
+    // It exists due to some games like Astral Chain operate out of bounds.
+    // Binding the whole map range would be technically correct, but games have large maps that make
+    // this approach unaffordable for now.
+    static constexpr u32 arbitrary_extra_bytes = 0xc000;
+    const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr));
+    const Binding binding{
+        .cpu_addr = *cpu_addr,
+        .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end),
+        .buffer_id = BufferId{},
+    };
+    return binding;
+}
+
+template <class P>
+std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
+    u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
+    if (IsRangeGranular(cpu_addr, size) ||
+        base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) {
+        return std::span(base_pointer, size);
+    } else {
+        const std::span<u8> span = ImmediateBuffer(size);
+        cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
+        return span;
+    }
+}
 
-    VideoCore::RasterizerInterface& rasterizer;
-    Tegra::MemoryManager& gpu_memory;
-    Core::Memory::Memory& cpu_memory;
-    StreamBuffer& stream_buffer;
-
-    u8* buffer_ptr = nullptr;
-    u64 buffer_offset = 0;
-    u64 buffer_offset_base = 0;
-
-    MapIntervalAllocator mapped_addresses_allocator;
-    boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
-        mapped_addresses;
-
-    std::unordered_map<u64, u32> written_pages;
-    std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
-
-    std::queue<std::shared_ptr<Buffer>> pending_destruction;
-    u64 epoch = 0;
-    u64 modified_ticks = 0;
-
-    std::vector<u8> staging_buffer;
-
-    std::list<MapInterval*> marked_for_unregister;
-
-    std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
-    std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
-
-    std::recursive_mutex mutex;
-};
+template <class P>
+std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
+    if (wanted_capacity > immediate_buffer_capacity) {
+        immediate_buffer_capacity = wanted_capacity;
+        immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity);
+    }
+    return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity);
+}
+
+template <class P>
+bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
+    if constexpr (IS_OPENGL) {
+        return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
+    } else {
+        // Only OpenGL has fast uniform buffers
+        return false;
+    }
+}
 
 } // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp
deleted file mode 100644
index 62587e18a..000000000
--- a/src/video_core/buffer_cache/map_interval.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <memory>
-
-#include "video_core/buffer_cache/map_interval.h"
-
-namespace VideoCommon {
-
-MapIntervalAllocator::MapIntervalAllocator() {
-    FillFreeList(first_chunk);
-}
-
-MapIntervalAllocator::~MapIntervalAllocator() = default;
-
-void MapIntervalAllocator::AllocateNewChunk() {
-    *new_chunk = std::make_unique<Chunk>();
-    FillFreeList(**new_chunk);
-    new_chunk = &(*new_chunk)->next;
-}
-
-void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
-    const std::size_t old_size = free_list.size();
-    free_list.resize(old_size + chunk.data.size());
-    std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
-                   [](MapInterval& interval) { return &interval; });
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
deleted file mode 100644
index ef974b08a..000000000
--- a/src/video_core/buffer_cache/map_interval.h
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <memory>
-#include <vector>
-
-#include <boost/intrusive/set_hook.hpp>
-
-#include "common/common_types.h"
-#include "video_core/gpu.h"
-
-namespace VideoCommon {
-
-struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
-    MapInterval() = default;
-
-    /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
-
-    explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
-        : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
-
-    bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
-        return start <= other_start && other_end <= end;
-    }
-
-    bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
-        return start < other_end && other_start < end;
-    }
-
-    void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
-        is_modified = is_modified_;
-        ticks = ticks_;
-    }
-
-    boost::intrusive::set_member_hook<> member_hook_;
-    VAddr start = 0;
-    VAddr end = 0;
-    GPUVAddr gpu_addr = 0;
-    u64 ticks = 0;
-    bool is_written = false;
-    bool is_modified = false;
-    bool is_registered = false;
-    bool is_memory_marked = false;
-    bool is_sync_pending = false;
-};
-
-struct MapIntervalCompare {
-    constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
-        return lhs.start < rhs.start;
-    }
-};
-
-class MapIntervalAllocator {
-public:
-    MapIntervalAllocator();
-    ~MapIntervalAllocator();
-
-    MapInterval* Allocate() {
-        if (free_list.empty()) {
-            AllocateNewChunk();
-        }
-        MapInterval* const interval = free_list.back();
-        free_list.pop_back();
-        return interval;
-    }
-
-    void Release(MapInterval* interval) {
-        free_list.push_back(interval);
-    }
-
-private:
-    struct Chunk {
-        std::unique_ptr<Chunk> next;
-        std::array<MapInterval, 0x8000> data;
-    };
-
-    void AllocateNewChunk();
-
-    void FillFreeList(Chunk& chunk);
-
-    std::vector<MapInterval*> free_list;
-
-    Chunk first_chunk;
-
-    std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 55e632346..2b7569335 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -110,12 +110,10 @@ void Vic::Execute() {
                                            converted_frame_buffer.get(), block_height, 0, 0);
 
             gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
-            gpu.Maxwell3D().OnMemoryWrite();
         } else {
             // send pitch linear frame
             gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
                                            linear_size);
-            gpu.Maxwell3D().OnMemoryWrite();
         }
         break;
     }
@@ -163,7 +161,6 @@ void Vic::Execute() {
         }
         gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
                                        chroma_buffer.size());
-        gpu.Maxwell3D().OnMemoryWrite();
         break;
     }
     default:
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index b1eaac00c..7149af290 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -12,13 +12,30 @@
 #define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32)))
 
 namespace VideoCommon::Dirty {
-
+namespace {
 using Tegra::Engines::Maxwell3D;
 
-void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
+void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) {
+    static constexpr std::size_t num_array = 3;
+    for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) {
+        const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
+        const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
+
+        FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
+        FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
+    }
+}
+
+void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) {
+    FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer);
+}
+
+void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) {
     FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
     FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
+}
 
+void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
     static constexpr std::size_t num_per_rt = NUM(rt[0]);
     static constexpr std::size_t begin = OFF(rt);
     static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@@ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
         FillBlock(table, OFF(zeta), NUM(zeta), flag);
     }
 }
+} // Anonymous namespace
+
+void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
+    SetupDirtyVertexBuffers(tables);
+    SetupIndexBuffer(tables);
+    SetupDirtyDescriptors(tables);
+    SetupDirtyRenderTargets(tables);
+}
 
 } // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 875527ddd..702688ace 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -30,6 +30,12 @@ enum : u8 {
     ColorBuffer7,
     ZetaBuffer,
 
+    VertexBuffers,
+    VertexBuffer0,
+    VertexBuffer31 = VertexBuffer0 + 31,
+
+    IndexBuffer,
+
     LastCommonEntry,
 };
 
@@ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_
     FillBlock(tables[1], begin, num, index_b);
 }
 
-void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
+void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
 
 } // namespace VideoCommon::Dirty
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 2c8b20024..8b33c04ab 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() {
     MICROPROFILE_SCOPE(DispatchCalls);
 
     gpu.SyncGuestHost();
-    // On entering GPU code, assume all memory may be touched by the ARM core.
-    gpu.Maxwell3D().OnMemoryWrite();
 
     dma_pushbuffer_subindex = 0;
 
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index a01d334ad..0f640fdae 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -18,8 +18,8 @@ Fermi2D::Fermi2D() {
 
 Fermi2D::~Fermi2D() = default;
 
-void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
-    rasterizer = &rasterizer_;
+void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
 }
 
 void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0de3280a2..c808a577d 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -38,7 +38,7 @@ public:
     ~Fermi2D();
 
     /// Binds a rasterizer to this engine.
-    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
 
     /// Write the value to the register identified by method.
     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index ba387506e..a9b75091e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage
 
 KeplerCompute::~KeplerCompute() = default;
 
-void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
-    rasterizer = &rasterizer_;
+void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
 }
 
 void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
@@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
     case KEPLER_COMPUTE_REG_INDEX(data_upload): {
         upload_state.ProcessData(method_argument, is_last_call);
         if (is_last_call) {
-            system.GPU().Maxwell3D().OnMemoryWrite();
         }
         break;
     }
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 9f0a7b76d..7c40cba38 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -46,7 +46,7 @@ public:
     ~KeplerCompute();
 
     /// Binds a rasterizer to this engine.
-    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
 
     static constexpr std::size_t NumConstBuffers = 8;
 
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 9911140e9..560551157 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
     case KEPLERMEMORY_REG_INDEX(data): {
         upload_state.ProcessData(method_argument, is_last_call);
         if (is_last_call) {
-            system.GPU().Maxwell3D().OnMemoryWrite();
         }
         break;
     }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 116ad1722..75517a4f7 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -30,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
 
 Maxwell3D::~Maxwell3D() = default;
 
-void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
-    rasterizer = &rasterizer_;
+void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
 }
 
 void Maxwell3D::InitializeRegisterDefaults() {
@@ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
     case MAXWELL3D_REG_INDEX(data_upload):
         upload_state.ProcessData(argument, is_last_call);
         if (is_last_call) {
-            OnMemoryWrite();
         }
         return;
     case MAXWELL3D_REG_INDEX(fragment_barrier):
@@ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
     }
 }
 
-void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
+void Maxwell3D::ProcessCBBind(size_t stage_index) {
     // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
-    auto& shader = state.shader_stages[stage_index];
-    auto& bind_data = regs.cb_bind[stage_index];
-
-    ASSERT(bind_data.index < Regs::MaxConstBuffers);
-    auto& buffer = shader.const_buffers[bind_data.index];
-
+    const auto& bind_data = regs.cb_bind[stage_index];
+    auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index];
     buffer.enabled = bind_data.valid.Value() != 0;
     buffer.address = regs.const_buffer.BufferAddress();
     buffer.size = regs.const_buffer.cb_size;
+
+    const bool is_enabled = bind_data.valid.Value() != 0;
+    const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0;
+    const u32 size = is_enabled ? regs.const_buffer.cb_size : 0;
+    rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
 }
 
 void Maxwell3D::ProcessCBData(u32 value) {
@@ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() {
 
     const u32 id = cb_data_state.id;
     memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
-    OnMemoryWrite();
 
     cb_data_state.id = null_cb_data;
     cb_data_state.current = null_cb_data;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 002d1b3f9..ffed42a29 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -55,7 +55,7 @@ public:
     ~Maxwell3D();
 
     /// Binds a rasterizer to this engine.
-    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
 
     /// Register structure of the Maxwell3D engine.
     /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -1314,8 +1314,7 @@ public:
 
                     GPUVAddr LimitAddress() const {
                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
-                                                     limit_low) +
-                               1;
+                                                     limit_low);
                     }
                 } vertex_array_limit[NumVertexArrays];
 
@@ -1403,6 +1402,7 @@ public:
         };
 
         std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
+
         u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
     };
 
@@ -1452,11 +1452,6 @@ public:
         return *rasterizer;
     }
 
-    /// Notify a memory write has happened.
-    void OnMemoryWrite() {
-        dirty.flags |= dirty.on_write_stores;
-    }
-
     enum class MMEDrawMode : u32 {
         Undefined,
         Array,
@@ -1478,7 +1473,6 @@ public:
         using Tables = std::array<Table, 2>;
 
         Flags flags;
-        Flags on_write_stores;
         Tables tables{};
     } dirty;
 
@@ -1541,7 +1535,7 @@ private:
     void FinishCBData();
 
     /// Handles a write to the CB_BIND register.
-    void ProcessCBBind(std::size_t stage_index);
+    void ProcessCBBind(size_t stage_index);
 
     /// Handles a write to the VERTEX_END_GL register, triggering a draw.
     void DrawArrays();
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ba750748c..a2f19559f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -60,9 +60,6 @@ void MaxwellDMA::Launch() {
         return;
     }
 
-    // All copies here update the main memory, so mark all rasterizer states as invalid.
-    system.GPU().Maxwell3D().OnMemoryWrite();
-
     if (is_src_pitch && is_dst_pitch) {
         CopyPitchToPitch();
     } else {
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 3512283ff..f055b61e9 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -143,22 +143,26 @@ private:
     }
 
     bool ShouldWait() const {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
         return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
                query_cache.ShouldWaitAsyncFlushes();
     }
 
     bool ShouldFlush() const {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
         return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
                query_cache.HasUncommittedFlushes();
     }
 
     void PopAsyncFlushes() {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
         texture_cache.PopAsyncFlushes();
         buffer_cache.PopAsyncFlushes();
         query_cache.PopAsyncFlushes();
     }
 
     void CommitAsyncFlushes() {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
         texture_cache.CommitAsyncFlushes();
         buffer_cache.CommitAsyncFlushes();
         query_cache.CommitAsyncFlushes();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 6ab06775f..2a9bd4121 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -44,8 +44,8 @@ GPU::~GPU() = default;
 
 void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
     renderer = std::move(renderer_);
+    rasterizer = renderer->ReadRasterizer();
 
-    VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
     memory_manager->BindRasterizer(rasterizer);
     maxwell_3d->BindRasterizer(rasterizer);
     fermi_2d->BindRasterizer(rasterizer);
@@ -171,7 +171,7 @@ void GPU::TickWork() {
         const std::size_t size = request.size;
         flush_requests.pop_front();
         flush_request_mutex.unlock();
-        renderer->Rasterizer().FlushRegion(addr, size);
+        rasterizer->FlushRegion(addr, size);
         current_flush_fence.store(fence);
         flush_request_mutex.lock();
     }
@@ -193,11 +193,11 @@ u64 GPU::GetTicks() const {
 }
 
 void GPU::FlushCommands() {
-    renderer->Rasterizer().FlushCommands();
+    rasterizer->FlushCommands();
 }
 
 void GPU::SyncGuestHost() {
-    renderer->Rasterizer().SyncGuestHost();
+    rasterizer->SyncGuestHost();
 }
 
 enum class GpuSemaphoreOperation {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b4ce6b154..b2ee45496 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -366,6 +366,7 @@ protected:
     std::unique_ptr<Tegra::DmaPusher> dma_pusher;
     std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
     std::unique_ptr<VideoCore::RendererBase> renderer;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
     const bool use_nvdec;
 
 private:
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 7e490bcc3..50319f1d5 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -38,6 +38,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
     }
 
     auto current_context = context.Acquire();
+    VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
 
     CommandDataContainer next;
     while (state.is_running) {
@@ -52,13 +53,13 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
         } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
             renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
         } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
-            renderer.Rasterizer().ReleaseFences();
+            rasterizer->ReleaseFences();
         } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
             system.GPU().TickWork();
         } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
-            renderer.Rasterizer().FlushRegion(flush->addr, flush->size);
+            rasterizer->FlushRegion(flush->addr, flush->size);
         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
-            renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size);
+            rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
         } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
             return;
         } else {
@@ -84,6 +85,7 @@ ThreadManager::~ThreadManager() {
 void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
                                 Core::Frontend::GraphicsContext& context,
                                 Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
+    rasterizer = renderer.ReadRasterizer();
     thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
                          std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
 }
@@ -129,12 +131,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
 }
 
 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
-    system.Renderer().Rasterizer().OnCPUWrite(addr, size);
+    rasterizer->OnCPUWrite(addr, size);
 }
 
 void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
-    system.Renderer().Rasterizer().OnCPUWrite(addr, size);
+    rasterizer->OnCPUWrite(addr, size);
 }
 
 void ThreadManager::WaitIdle() const {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 2775629e7..4cd951169 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -27,6 +27,7 @@ class System;
 } // namespace Core
 
 namespace VideoCore {
+class RasterizerInterface;
 class RendererBase;
 } // namespace VideoCore
 
@@ -151,11 +152,12 @@ private:
     /// Pushes a command to be executed by the GPU thread
     u64 PushCommand(CommandData&& command_data);
 
-    SynchState state;
     Core::System& system;
-    std::thread thread;
-    std::thread::id thread_id;
     const bool is_async;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+    SynchState state;
+    std::thread thread;
 };
 
 } // namespace VideoCommon::GPUThread
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 28f2b8614..970120acc 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -12,7 +12,6 @@ set(SHADER_FILES
     vulkan_blit_depth_stencil.frag
     vulkan_present.frag
     vulkan_present.vert
-    vulkan_quad_array.comp
     vulkan_quad_indexed.comp
     vulkan_uint8.comp
 )
diff --git a/src/video_core/host_shaders/vulkan_quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp
deleted file mode 100644
index 212f4e998..000000000
--- a/src/video_core/host_shaders/vulkan_quad_array.comp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#version 460 core
-
-layout (local_size_x = 1024) in;
-
-layout (std430, set = 0, binding = 0) buffer OutputBuffer {
-    uint output_indexes[];
-};
-
-layout (push_constant) uniform PushConstants {
-    uint first;
-};
-
-void main() {
-    uint primitive = gl_GlobalInvocationID.x;
-    if (primitive * 6 >= output_indexes.length()) {
-        return;
-    }
-
-    const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
-    for (uint vertex = 0; vertex < 6; ++vertex) {
-        uint index = first + primitive * 4 + quad_map[vertex];
-        output_indexes[primitive * 6 + vertex] = index;
-    }
-}
diff --git a/src/video_core/host_shaders/vulkan_uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp
index ad74d7af9..872291670 100644
--- a/src/video_core/host_shaders/vulkan_uint8.comp
+++ b/src/video_core/host_shaders/vulkan_uint8.comp
@@ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
     uint16_t output_indexes[];
 };
 
+uint AssembleIndex(uint id) {
+    // Most primitive restart indices are 0xFF
+    // Hardcode this to 0xFF for now
+    uint index = uint(input_indexes[id]);
+    return index == 0xFF ? 0xFFFF : index;
+}
+
 void main() {
     uint id = gl_GlobalInvocationID.x;
     if (id < input_indexes.length()) {
-        output_indexes[id] = uint16_t(input_indexes[id]);
+        output_indexes[id] = uint16_t(AssembleIndex(id));
     }
 }
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index c841f3cd7..44240a9c4 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -21,8 +21,8 @@ MemoryManager::MemoryManager(Core::System& system_)
 
 MemoryManager::~MemoryManager() = default;
 
-void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
-    rasterizer = &rasterizer_;
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
 }
 
 GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b468a67de..b3538d503 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -72,7 +72,7 @@ public:
     ~MemoryManager();
 
     /// Binds a renderer to the memory manager.
-    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
 
     [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
 
@@ -157,6 +157,8 @@ private:
 
     using MapRange = std::pair<GPUVAddr, size_t>;
     std::vector<MapRange> map_ranges;
+
+    std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 0cb0f387d..50491b758 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -7,6 +7,7 @@
 #include <atomic>
 #include <functional>
 #include <optional>
+#include <span>
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
@@ -49,6 +50,10 @@ public:
     /// Records a GPU query and caches it
     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
 
+    /// Signal an uniform buffer binding
+    virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+                                           u32 size) = 0;
+
     /// Signal a GPU based semaphore as a fence
     virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
 
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 51dde8eb5..320ee8d30 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -37,15 +37,11 @@ public:
                           std::unique_ptr<Core::Frontend::GraphicsContext> context);
     virtual ~RendererBase();
 
-    /// Initialize the renderer
-    [[nodiscard]] virtual bool Init() = 0;
-
-    /// Shutdown the renderer
-    virtual void ShutDown() = 0;
-
     /// Finalize rendering the guest frame and draw into the presentation texture
     virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
 
+    [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
+
     // Getter/setter functions:
     // ------------------------
 
@@ -57,14 +53,6 @@ public:
         return m_current_frame;
     }
 
-    [[nodiscard]] RasterizerInterface& Rasterizer() {
-        return *rasterizer;
-    }
-
-    [[nodiscard]] const RasterizerInterface& Rasterizer() const {
-        return *rasterizer;
-    }
-
     [[nodiscard]] Core::Frontend::GraphicsContext& Context() {
         return *context;
     }
@@ -98,7 +86,6 @@ public:
 
 protected:
     Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
-    std::unique_ptr<RasterizerInterface> rasterizer;
     std::unique_ptr<Core::Frontend::GraphicsContext> context;
     f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
     int m_current_frame = 0;  ///< Current frame, should be set by the renderer
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 5772cad87..6da3906a4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,98 +2,208 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <memory>
+#include <span>
 
-#include <glad/glad.h>
-
-#include "common/assert.h"
-#include "common/microprofile.h"
 #include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
 
 namespace OpenGL {
+namespace {
+struct BindlessSSBO {
+    GLuint64EXT address;
+    GLsizei length;
+    GLsizei padding;
+};
+static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
+
+constexpr std::array PROGRAM_LUT{
+    GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+    GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+};
+} // Anonymous namespace
+
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+               VAddr cpu_addr_, u64 size_bytes_)
+    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
+    buffer.Create();
+    const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
+    glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
+    glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
+
+    if (runtime.has_unified_vertex_buffers) {
+        glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
+    }
+}
 
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
+    glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+                         static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+}
 
-MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
+void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
+    glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+                            static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+}
 
-Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
-    : BufferBlock{cpu_addr_, size_} {
-    gl_buffer.Create();
-    glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
-    if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
-        glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
-        glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+void Buffer::MakeResident(GLenum access) noexcept {
+    // Abuse GLenum's order to exit early
+    // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
+    if (access <= current_residency_access || buffer.handle == 0) {
+        return;
+    }
+    if (std::exchange(current_residency_access, access) != GL_NONE) {
+        // If the buffer is already resident, remove its residency before promoting it
+        glMakeNamedBufferNonResidentNV(buffer.handle);
     }
+    glMakeNamedBufferResidentNV(buffer.handle, access);
 }
 
-Buffer::~Buffer() = default;
-
-void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
-    glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
-                         static_cast<GLsizeiptr>(data_size), data);
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
+    : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
+      use_assembly_shaders{device.UseAssemblyShaders()},
+      has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
+      stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
+    GLint gl_max_attributes;
+    glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
+    max_attributes = static_cast<u32>(gl_max_attributes);
+    for (auto& stage_uniforms : fast_uniforms) {
+        for (OGLBuffer& buffer : stage_uniforms) {
+            buffer.Create();
+            glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
+        }
+    }
+    for (auto& stage_uniforms : copy_uniforms) {
+        for (OGLBuffer& buffer : stage_uniforms) {
+            buffer.Create();
+            glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
+        }
+    }
+    for (OGLBuffer& buffer : copy_compute_uniforms) {
+        buffer.Create();
+        glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
+    }
 }
 
-void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
-    MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
-    const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
-    const GLintptr gl_offset = static_cast<GLintptr>(offset);
-    if (read_buffer.handle == 0) {
-        read_buffer.Create();
-        glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
-                          GL_STREAM_READ);
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+                                    std::span<const VideoCommon::BufferCopy> copies) {
+    for (const VideoCommon::BufferCopy& copy : copies) {
+        glCopyNamedBufferSubData(
+            src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
+            static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
     }
-    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
-    glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
-    glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
 }
 
-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
-                      std::size_t copy_size) {
-    glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
-                             static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
+void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
+    if (has_unified_vertex_buffers) {
+        buffer.MakeResident(GL_READ_ONLY);
+        glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
+                               static_cast<GLsizeiptr>(size));
+    } else {
+        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
+        index_buffer_offset = offset;
+    }
 }
 
-OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
-                               Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                               const Device& device_, OGLStreamBuffer& stream_buffer_,
-                               StateTracker& state_tracker)
-    : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
-    if (!device.HasFastBufferSubData()) {
+void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
+                                          u32 stride) {
+    if (index >= max_attributes) {
         return;
     }
-
-    static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
-    glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
-    for (const GLuint cbuf : cbufs) {
-        glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
+    if (has_unified_vertex_buffers) {
+        buffer.MakeResident(GL_READ_ONLY);
+        glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
+        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
+                               buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
+    } else {
+        glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
+                           static_cast<GLsizei>(stride));
     }
 }
 
-OGLBufferCache::~OGLBufferCache() {
-    glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
+                                           u32 offset, u32 size) {
+    if (use_assembly_shaders) {
+        GLuint handle;
+        if (offset != 0) {
+            handle = copy_uniforms[stage][binding_index].handle;
+            glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
+        } else {
+            handle = buffer.Handle();
+        }
+        glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
+                            static_cast<GLsizeiptr>(size));
+    } else {
+        const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+        const GLuint binding = base_binding + binding_index;
+        glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+    }
 }
 
-std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
-    return std::make_shared<Buffer>(device, cpu_addr, size);
+void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
+                                                  u32 size) {
+    if (use_assembly_shaders) {
+        GLuint handle;
+        if (offset != 0) {
+            handle = copy_compute_uniforms[binding_index].handle;
+            glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
+        } else {
+            handle = buffer.Handle();
+        }
+        glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
+                            static_cast<GLsizeiptr>(size));
+    } else {
+        glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+    }
 }
 
-OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
-    return {0, 0, 0};
+void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
+                                           u32 offset, u32 size, bool is_written) {
+    if (use_assembly_shaders) {
+        const BindlessSSBO ssbo{
+            .address = buffer.HostGpuAddr() + offset,
+            .length = static_cast<GLsizei>(size),
+            .padding = 0,
+        };
+        buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
+        glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
+                                        reinterpret_cast<const GLuint*>(&ssbo));
+    } else {
+        const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
+        const GLuint binding = base_binding + binding_index;
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+    }
 }
 
-OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
-                                                             std::size_t size) {
-    DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
-    const GLuint cbuf = cbufs[cbuf_cursor++];
+void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
+                                                  u32 size, bool is_written) {
+    if (use_assembly_shaders) {
+        const BindlessSSBO ssbo{
+            .address = buffer.HostGpuAddr() + offset,
+            .length = static_cast<GLsizei>(size),
+            .padding = 0,
+        };
+        buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
+        glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
+                                        reinterpret_cast<const GLuint*>(&ssbo));
+    } else if (size == 0) {
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+    } else {
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+    }
+}
 
-    glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
-    return {cbuf, 0, 0};
+void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
+                                                     u32 size) {
+    glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
+                      static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 17ee90316..d8b20a9af 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -5,79 +5,157 @@
 #pragma once
 
 #include <array>
-#include <memory>
+#include <span>
 
+#include "common/alignment.h"
 #include "common/common_types.h"
+#include "common/dynamic_library.h"
 #include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"
 
-namespace Core {
-class System;
-}
-
 namespace OpenGL {
 
-class Device;
-class OGLStreamBuffer;
-class RasterizerOpenGL;
-class StateTracker;
+class BufferCacheRuntime;
 
-class Buffer : public VideoCommon::BufferBlock {
+class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
 public:
-    explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
-    ~Buffer();
+    explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
+                    u64 size_bytes);
+    explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
 
-    void Upload(std::size_t offset, std::size_t data_size, const u8* data);
+    void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
 
-    void Download(std::size_t offset, std::size_t data_size, u8* data);
+    void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
 
-    void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
-                  std::size_t copy_size);
+    void MakeResident(GLenum access) noexcept;
 
-    GLuint Handle() const noexcept {
-        return gl_buffer.handle;
+    [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
+        return address;
     }
 
-    u64 Address() const noexcept {
-        return gpu_address;
+    [[nodiscard]] GLuint Handle() const noexcept {
+        return buffer.handle;
     }
 
 private:
-    OGLBuffer gl_buffer;
-    OGLBuffer read_buffer;
-    u64 gpu_address = 0;
+    GLuint64EXT address = 0;
+    OGLBuffer buffer;
+    GLenum current_residency_access = GL_NONE;
 };
 
-using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
-class OGLBufferCache final : public GenericBufferCache {
+class BufferCacheRuntime {
+    friend Buffer;
+
 public:
-    explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
-                            Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
-                            const Device& device, OGLStreamBuffer& stream_buffer,
-                            StateTracker& state_tracker);
-    ~OGLBufferCache();
+    static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
+
+    explicit BufferCacheRuntime(const Device& device_);
+
+    void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+                    std::span<const VideoCommon::BufferCopy> copies);
+
+    void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
+
+    void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
+
+    void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
+
+    void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
+
+    void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
+                           bool is_written);
+
+    void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
+                                  bool is_written);
+
+    void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+
+    void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
+        if (use_assembly_shaders) {
+            const GLuint handle = fast_uniforms[stage][binding_index].handle;
+            const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
+            glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
+        } else {
+            const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+            const GLuint binding = base_binding + binding_index;
+            glBindBufferRange(GL_UNIFORM_BUFFER, binding,
+                              fast_uniforms[stage][binding_index].handle, 0,
+                              static_cast<GLsizeiptr>(size));
+        }
+    }
 
-    BufferInfo GetEmptyBuffer(std::size_t) override;
+    void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
+        if (use_assembly_shaders) {
+            glProgramBufferParametersIuivNV(
+                PABO_LUT[stage], binding_index, 0,
+                static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
+                reinterpret_cast<const GLuint*>(data.data()));
+        } else {
+            glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
+                                 static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+        }
+    }
 
-    void Acquire() noexcept {
-        cbuf_cursor = 0;
+    std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
+        const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
+        const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+        const GLuint binding = base_binding + binding_index;
+        glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+        return mapped_span;
     }
 
-protected:
-    std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+    [[nodiscard]] const GLvoid* IndexOffset() const noexcept {
+        return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
+    }
 
-    BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
+    [[nodiscard]] bool HasFastBufferSubData() const noexcept {
+        return has_fast_buffer_sub_data;
+    }
 
 private:
-    static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
-                                             Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
+    static constexpr std::array PABO_LUT{
+        GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
+        GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
+        GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
+    };
 
     const Device& device;
 
-    std::size_t cbuf_cursor = 0;
-    std::array<GLuint, NUM_CBUFS> cbufs{};
+    bool has_fast_buffer_sub_data = false;
+    bool use_assembly_shaders = false;
+    bool has_unified_vertex_buffers = false;
+
+    u32 max_attributes = 0;
+
+    std::optional<StreamBuffer> stream_buffer;
+
+    std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
+               VideoCommon::NUM_STAGES>
+        fast_uniforms;
+    std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
+               VideoCommon::NUM_STAGES>
+        copy_uniforms;
+    std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
+
+    u32 index_buffer_offset = 0;
+};
+
+struct BufferCacheParams {
+    using Runtime = OpenGL::BufferCacheRuntime;
+    using Buffer = OpenGL::Buffer;
+
+    static constexpr bool IS_OPENGL = true;
+    static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
+    static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
+    static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
+    static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
+    static constexpr bool USE_MEMORY_MAPS = false;
 };
 
+using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 04c267ee4..48d5c4a5e 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -21,9 +21,7 @@
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 
 namespace OpenGL {
-
 namespace {
-
 // One uniform block is reserved for emulation purposes
 constexpr u32 ReservedUniformBlocks = 1;
 
@@ -197,11 +195,13 @@ bool IsASTCSupported() {
     const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
     return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
 }
-
 } // Anonymous namespace
 
-Device::Device()
-    : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
+Device::Device() {
+    if (!GLAD_GL_VERSION_4_6) {
+        LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
+        throw std::runtime_error{"Insufficient version"};
+    }
     const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
     const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
     const std::vector extensions = GetExtensions();
@@ -217,6 +217,9 @@ Device::Device()
             "Beta driver 443.24 is known to have issues. There might be performance issues.");
         disable_fast_buffer_sub_data = true;
     }
+
+    max_uniform_buffers = BuildMaxUniformBuffers();
+    base_bindings = BuildBaseBindings();
     uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
     shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 9141de635..ee053776d 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -10,11 +10,9 @@
 
 namespace OpenGL {
 
-static constexpr u32 EmulationUniformBlockBinding = 0;
-
-class Device final {
+class Device {
 public:
-    struct BaseBindings final {
+    struct BaseBindings {
         u32 uniform_buffer{};
         u32 shader_storage_buffer{};
         u32 sampler{};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 3e9c922f5..151290101 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -47,7 +47,7 @@ void GLInnerFence::Wait() {
 
 FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
                                        Tegra::GPU& gpu_, TextureCache& texture_cache_,
-                                       OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
+                                       BufferCache& buffer_cache_, QueryCache& query_cache_)
     : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
 
 Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 30dbee613..e714aa115 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -32,14 +32,13 @@ private:
 };
 
 using Fence = std::shared_ptr<GLInnerFence>;
-using GenericFenceManager =
-    VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
+using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
 
 class FenceManagerOpenGL final : public GenericFenceManager {
 public:
-    explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
-                                TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
-                                QueryCache& query_cache_);
+    explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+                                TextureCache& texture_cache, BufferCache& buffer_cache,
+                                QueryCache& query_cache);
 
 protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ea4ca9a82..418644108 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -38,34 +38,21 @@
 namespace OpenGL {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using GLvec4 = std::array<GLfloat, 4>;
 
 using Tegra::Engines::ShaderType;
 using VideoCore::Surface::PixelFormat;
 using VideoCore::Surface::SurfaceTarget;
 using VideoCore::Surface::SurfaceType;
 
-MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
-MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
+MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
 
 namespace {
 
-constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
-constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
-    NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
-constexpr size_t TOTAL_CONST_BUFFER_BYTES =
-    NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
-
 constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
 
 struct TextureHandle {
     constexpr TextureHandle(u32 data, bool via_header_index) {
@@ -101,20 +88,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
     return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
 }
 
-std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
-                               const ConstBufferEntry& entry) {
-    if (!entry.IsIndirect()) {
-        return entry.GetSize();
-    }
-    if (buffer.size > Maxwell::MaxConstBufferSize) {
-        LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
-                    Maxwell::MaxConstBufferSize);
-        return Maxwell::MaxConstBufferSize;
-    }
-
-    return buffer.size;
-}
-
 /// Translates hardware transform feedback indices
 /// @param location Hardware location
 /// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@@ -147,14 +120,6 @@ void oglEnable(GLenum cap, bool state) {
     (state ? glEnable : glDisable)(cap);
 }
 
-void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
-    if (num_ssbos == 0) {
-        return;
-    }
-    glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
-                                    reinterpret_cast<const GLuint*>(ssbos));
-}
-
 ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
     if (entry.is_buffer) {
         return ImageViewType::Buffer;
@@ -201,44 +166,28 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
     : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
       kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
       screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
-      stream_buffer(device, state_tracker),
       texture_cache_runtime(device, program_manager, state_tracker),
       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+      buffer_cache_runtime(device),
+      buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
       shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
       query_cache(*this, maxwell3d, gpu_memory),
-      buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
       async_shaders(emu_window_) {
-    unified_uniform_buffer.Create();
-    glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
-
-    if (device.UseAssemblyShaders()) {
-        glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
-        for (const GLuint cbuf : staging_cbufs) {
-            glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
-                                 nullptr, 0);
-        }
-    }
     if (device.UseAsynchronousShaders()) {
         async_shaders.AllocateWorkers();
     }
 }
 
-RasterizerOpenGL::~RasterizerOpenGL() {
-    if (device.UseAssemblyShaders()) {
-        glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
-    }
-}
+RasterizerOpenGL::~RasterizerOpenGL() = default;
 
-void RasterizerOpenGL::SetupVertexFormat() {
+void RasterizerOpenGL::SyncVertexFormats() {
     auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::VertexFormats]) {
         return;
     }
     flags[Dirty::VertexFormats] = false;
 
-    MICROPROFILE_SCOPE(OpenGL_VAO);
-
     // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
     // the first 16 vertex attributes always, as we don't know which ones are actually used until
     // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
@@ -274,55 +223,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
     }
 }
 
-void RasterizerOpenGL::SetupVertexBuffer() {
-    auto& flags = maxwell3d.dirty.flags;
-    if (!flags[Dirty::VertexBuffers]) {
-        return;
-    }
-    flags[Dirty::VertexBuffers] = false;
-
-    MICROPROFILE_SCOPE(OpenGL_VB);
-
-    const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
-
-    // Upload all guest vertex arrays sequentially to our buffer
-    const auto& regs = maxwell3d.regs;
-    for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
-        if (!flags[Dirty::VertexBuffer0 + index]) {
-            continue;
-        }
-        flags[Dirty::VertexBuffer0 + index] = false;
-
-        const auto& vertex_array = regs.vertex_array[index];
-        if (!vertex_array.IsEnabled()) {
-            continue;
-        }
-
-        const GPUVAddr start = vertex_array.StartAddress();
-        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-        ASSERT(end >= start);
-
-        const GLuint gl_index = static_cast<GLuint>(index);
-        const u64 size = end - start;
-        if (size == 0) {
-            glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
-            if (use_unified_memory) {
-                glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
-            }
-            continue;
-        }
-        const auto info = buffer_cache.UploadMemory(start, size);
-        if (use_unified_memory) {
-            glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
-            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
-                                   info.address + info.offset, size);
-        } else {
-            glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
-        }
-    }
-}
-
-void RasterizerOpenGL::SetupVertexInstances() {
+void RasterizerOpenGL::SyncVertexInstances() {
     auto& flags = maxwell3d.dirty.flags;
     if (!flags[Dirty::VertexInstances]) {
         return;
@@ -343,17 +244,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
     }
 }
 
-GLintptr RasterizerOpenGL::SetupIndexBuffer() {
-    MICROPROFILE_SCOPE(OpenGL_Index);
-    const auto& regs = maxwell3d.regs;
-    const std::size_t size = CalculateIndexBufferSize();
-    const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
-    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
-    return info.offset;
-}
-
-void RasterizerOpenGL::SetupShaders() {
-    MICROPROFILE_SCOPE(OpenGL_Shader);
+void RasterizerOpenGL::SetupShaders(bool is_indexed) {
     u32 clip_distances = 0;
 
     std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
@@ -410,11 +301,19 @@ void RasterizerOpenGL::SetupShaders() {
         const size_t stage = index == 0 ? 0 : index - 1;
         shaders[stage] = shader;
 
-        SetupDrawConstBuffers(stage, shader);
-        SetupDrawGlobalMemory(stage, shader);
         SetupDrawTextures(shader, stage);
         SetupDrawImages(shader, stage);
 
+        buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
+
+        buffer_cache.UnbindGraphicsStorageBuffers(stage);
+        u32 ssbo_index = 0;
+        for (const auto& buffer : shader->GetEntries().global_memory_entries) {
+            buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
+                                                   buffer.cbuf_offset, buffer.is_written);
+            ++ssbo_index;
+        }
+
         // Workaround for Intel drivers.
         // When a clip distance is enabled but not set in the shader it crops parts of the screen
         // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -430,43 +329,26 @@ void RasterizerOpenGL::SetupShaders() {
     SyncClipEnabled(clip_distances);
     maxwell3d.dirty.flags[Dirty::Shaders] = false;
 
+    buffer_cache.UpdateGraphicsBuffers(is_indexed);
+
     const std::span indices_span(image_view_indices.data(), image_view_indices.size());
     texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
 
+    buffer_cache.BindHostGeometryBuffers(is_indexed);
+
     size_t image_view_index = 0;
     size_t texture_index = 0;
     size_t image_index = 0;
     for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
         const Shader* const shader = shaders[stage];
-        if (shader) {
-            const auto base = device.GetBaseBindings(stage);
-            BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
-                         texture_index, image_index);
-        }
-    }
-}
-
-std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
-    const auto& regs = maxwell3d.regs;
-
-    std::size_t size = 0;
-    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        if (!regs.vertex_array[index].IsEnabled())
+        if (!shader) {
             continue;
-
-        const GPUVAddr start = regs.vertex_array[index].StartAddress();
-        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-
-        size += end - start;
-        ASSERT(end >= start);
+        }
+        buffer_cache.BindHostStageBuffers(stage);
+        const auto& base = device.GetBaseBindings(stage);
+        BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
+                     texture_index, image_index);
     }
-
-    return size;
-}
-
-std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
-    return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
-           static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
 }
 
 void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
@@ -475,6 +357,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
 }
 
 void RasterizerOpenGL::Clear() {
+    MICROPROFILE_SCOPE(OpenGL_Clears);
     if (!maxwell3d.ShouldExecute()) {
         return;
     }
@@ -525,11 +408,9 @@ void RasterizerOpenGL::Clear() {
     }
     UNIMPLEMENTED_IF(regs.clear_flags.viewport);
 
-    {
-        auto lock = texture_cache.AcquireLock();
-        texture_cache.UpdateRenderTargets(true);
-        state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
-    }
+    std::scoped_lock lock{texture_cache.mutex};
+    texture_cache.UpdateRenderTargets(true);
+    state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
 
     if (use_color) {
         glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -541,7 +422,6 @@ void RasterizerOpenGL::Clear() {
     } else if (use_stencil) {
         glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
     }
-
     ++num_queued_commands;
 }
 
@@ -550,75 +430,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 
     query_cache.UpdateCounters();
 
-    SyncViewport();
-    SyncRasterizeEnable();
-    SyncPolygonModes();
-    SyncColorMask();
-    SyncFragmentColorClampState();
-    SyncMultiSampleState();
-    SyncDepthTestState();
-    SyncDepthClamp();
-    SyncStencilTestState();
-    SyncBlendState();
-    SyncLogicOpState();
-    SyncCullMode();
-    SyncPrimitiveRestart();
-    SyncScissorTest();
-    SyncPointState();
-    SyncLineState();
-    SyncPolygonOffset();
-    SyncAlphaTest();
-    SyncFramebufferSRGB();
-
-    buffer_cache.Acquire();
-    current_cbuf = 0;
-
-    std::size_t buffer_size = CalculateVertexArraysSize();
-
-    // Add space for index buffer
-    if (is_indexed) {
-        buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
-    }
-
-    // Uniform space for the 5 shader stages
-    buffer_size =
-        Common::AlignUp<std::size_t>(buffer_size, 4) +
-        (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
-
-    // Add space for at least 18 constant buffers
-    buffer_size += Maxwell::MaxConstBuffers *
-                   (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-
-    // Prepare the vertex array.
-    buffer_cache.Map(buffer_size);
-
-    // Prepare vertex array format.
-    SetupVertexFormat();
-
-    // Upload vertex and index data.
-    SetupVertexBuffer();
-    SetupVertexInstances();
-    GLintptr index_buffer_offset = 0;
-    if (is_indexed) {
-        index_buffer_offset = SetupIndexBuffer();
-    }
-
-    // Setup emulation uniform buffer.
-    if (!device.UseAssemblyShaders()) {
-        MaxwellUniformData ubo;
-        ubo.SetFromRegs(maxwell3d);
-        const auto info =
-            buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
-        glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
-                          static_cast<GLsizeiptr>(sizeof(ubo)));
-    }
+    SyncState();
 
     // Setup shaders and their used resources.
-    auto lock = texture_cache.AcquireLock();
-    SetupShaders();
+    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+    SetupShaders(is_indexed);
 
-    // Signal the buffer cache that we are not going to upload more things.
-    buffer_cache.Unmap();
     texture_cache.UpdateRenderTargets(false);
     state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
     program_manager.BindGraphicsPipeline();
@@ -632,7 +449,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
     if (is_indexed) {
         const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
         const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
-        const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
+        const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
         const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
         if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
             glDrawElements(primitive_mode, num_vertices, format, offset);
@@ -672,22 +489,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 }
 
 void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
-    buffer_cache.Acquire();
-    current_cbuf = 0;
-
     Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
 
-    auto lock = texture_cache.AcquireLock();
+    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
     BindComputeTextures(kernel);
 
-    const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
-                               (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-    buffer_cache.Map(buffer_size);
-
-    SetupComputeConstBuffers(kernel);
-    SetupComputeGlobalMemory(kernel);
-
-    buffer_cache.Unmap();
+    const auto& entries = kernel->GetEntries();
+    buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
+    buffer_cache.UnbindComputeStorageBuffers();
+    u32 ssbo_index = 0;
+    for (const auto& buffer : entries.global_memory_entries) {
+        buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
+                                              buffer.is_written);
+        ++ssbo_index;
+    }
+    buffer_cache.UpdateComputeBuffers();
+    buffer_cache.BindHostComputeBuffers();
 
     const auto& launch_desc = kepler_compute.launch_description;
     glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
@@ -703,6 +520,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
     query_cache.Query(gpu_addr, type, timestamp);
 }
 
+void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+                                                 u32 size) {
+    std::scoped_lock lock{buffer_cache.mutex};
+    buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
+}
+
 void RasterizerOpenGL::FlushAll() {}
 
 void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
@@ -711,19 +534,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
         return;
     }
     {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
         texture_cache.DownloadMemory(addr, size);
     }
-    buffer_cache.FlushRegion(addr, size);
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.DownloadMemory(addr, size);
+    }
     query_cache.FlushRegion(addr, size);
 }
 
 bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
     if (!Settings::IsGPULevelHigh()) {
-        return buffer_cache.MustFlushRegion(addr, size);
+        return buffer_cache.IsRegionGpuModified(addr, size);
     }
     return texture_cache.IsRegionGpuModified(addr, size) ||
-           buffer_cache.MustFlushRegion(addr, size);
+           buffer_cache.IsRegionGpuModified(addr, size);
 }
 
 void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -732,11 +559,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
         return;
     }
     {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
         texture_cache.WriteMemory(addr, size);
     }
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.WriteMemory(addr, size);
+    }
     shader_cache.InvalidateRegion(addr, size);
-    buffer_cache.InvalidateRegion(addr, size);
     query_cache.InvalidateRegion(addr, size);
 }
 
@@ -745,26 +575,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
     if (addr == 0 || size == 0) {
         return;
     }
+    shader_cache.OnCPUWrite(addr, size);
     {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
         texture_cache.WriteMemory(addr, size);
     }
-    shader_cache.OnCPUWrite(addr, size);
-    buffer_cache.OnCPUWrite(addr, size);
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.CachedWriteMemory(addr, size);
+    }
 }
 
 void RasterizerOpenGL::SyncGuestHost() {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-    buffer_cache.SyncGuestHost();
     shader_cache.SyncGuestHost();
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.FlushCachedWrites();
+    }
 }
 
 void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
     {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
         texture_cache.UnmapMemory(addr, size);
     }
-    buffer_cache.OnCPUWrite(addr, size);
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.WriteMemory(addr, size);
+    }
     shader_cache.OnCPUWrite(addr, size);
 }
 
@@ -799,14 +638,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 }
 
 void RasterizerOpenGL::WaitForIdle() {
-    // Place a barrier on everything that is not framebuffer related.
-    // This is related to another flag that is not currently implemented.
-    glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
-                    GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
-                    GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
-                    GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
-                    GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
-                    GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
+    glMemoryBarrier(GL_ALL_BARRIER_BITS);
 }
 
 void RasterizerOpenGL::FragmentBarrier() {
@@ -831,18 +663,21 @@ void RasterizerOpenGL::TickFrame() {
     num_queued_commands = 0;
 
     fence_manager.TickFrame();
-    buffer_cache.TickFrame();
     {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
         texture_cache.TickFrame();
     }
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.TickFrame();
+    }
 }
 
 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
                                              const Tegra::Engines::Fermi2D::Surface& dst,
                                              const Tegra::Engines::Fermi2D::Config& copy_config) {
     MICROPROFILE_SCOPE(OpenGL_Blits);
-    auto lock = texture_cache.AcquireLock();
+    std::scoped_lock lock{texture_cache.mutex};
     texture_cache.BlitImage(dst, src, copy_config);
     return true;
 }
@@ -854,7 +689,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
     }
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 
-    auto lock = texture_cache.AcquireLock();
+    std::scoped_lock lock{texture_cache.mutex};
     ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
     if (!image_view) {
         return false;
@@ -921,166 +756,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
     }
 }
 
-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
-    static constexpr std::array PARAMETER_LUT{
-        GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
-        GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
-        GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
-    };
-    MICROPROFILE_SCOPE(OpenGL_UBO);
-    const auto& stages = maxwell3d.state.shader_stages;
-    const auto& shader_stage = stages[stage_index];
-    const auto& entries = shader->GetEntries();
-    const bool use_unified = entries.use_unified_uniforms;
-    const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
-
-    const auto base_bindings = device.GetBaseBindings(stage_index);
-    u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
-    for (const auto& entry : entries.const_buffers) {
-        const u32 index = entry.GetIndex();
-        const auto& buffer = shader_stage.const_buffers[index];
-        SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
-                         base_unified_offset + index * Maxwell::MaxConstBufferSize);
-        ++binding;
-    }
-    if (use_unified) {
-        const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
-                                           entries.global_memory_entries.size());
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
-                          base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
-    }
-}
-
-void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
-    MICROPROFILE_SCOPE(OpenGL_UBO);
-    const auto& launch_desc = kepler_compute.launch_description;
-    const auto& entries = kernel->GetEntries();
-    const bool use_unified = entries.use_unified_uniforms;
-
-    u32 binding = 0;
-    for (const auto& entry : entries.const_buffers) {
-        const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
-        const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
-        Tegra::Engines::ConstBufferInfo buffer;
-        buffer.address = config.Address();
-        buffer.size = config.size;
-        buffer.enabled = mask[entry.GetIndex()];
-        SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
-                         use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
-        ++binding;
-    }
-    if (use_unified) {
-        const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
-                          NUM_CONST_BUFFERS_BYTES_PER_STAGE);
-    }
-}
-
-void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
-                                        const Tegra::Engines::ConstBufferInfo& buffer,
-                                        const ConstBufferEntry& entry, bool use_unified,
-                                        std::size_t unified_offset) {
-    if (!buffer.enabled) {
-        // Set values to zero to unbind buffers
-        if (device.UseAssemblyShaders()) {
-            glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
-        } else {
-            glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
-        }
-        return;
-    }
-
-    // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
-    // UBO alignment requirements.
-    const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
-
-    const bool fast_upload = !use_unified && device.HasFastBufferSubData();
-
-    const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
-    const GPUVAddr gpu_addr = buffer.address;
-    auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
-
-    if (device.UseAssemblyShaders()) {
-        UNIMPLEMENTED_IF(use_unified);
-        if (info.offset != 0) {
-            const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
-            glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
-            info.handle = staging_cbuf;
-            info.offset = 0;
-        }
-        glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
-        return;
-    }
-
-    if (use_unified) {
-        glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
-                                 unified_offset, size);
-    } else {
-        glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
-    }
-}
-
-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
-    static constexpr std::array TARGET_LUT = {
-        GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
-        GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
-    };
-    const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
-    const auto& entries{shader->GetEntries().global_memory_entries};
-
-    std::array<BindlessSSBO, 32> ssbos;
-    ASSERT(entries.size() < ssbos.size());
-
-    const bool assembly_shaders = device.UseAssemblyShaders();
-    u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
-    for (const auto& entry : entries) {
-        const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
-        const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
-        const u32 size{gpu_memory.Read<u32>(addr + 8)};
-        SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
-        ++binding;
-    }
-    if (assembly_shaders) {
-        UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
-    }
-}
-
-void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
-    const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
-    const auto& entries{kernel->GetEntries().global_memory_entries};
-
-    std::array<BindlessSSBO, 32> ssbos;
-    ASSERT(entries.size() < ssbos.size());
-
-    u32 binding = 0;
-    for (const auto& entry : entries) {
-        const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
-        const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
-        const u32 size{gpu_memory.Read<u32>(addr + 8)};
-        SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
-        ++binding;
-    }
-    if (device.UseAssemblyShaders()) {
-        UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
-    }
-}
-
-void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
-                                         GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
-    const size_t alignment{device.GetShaderStorageBufferAlignment()};
-    const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
-    if (device.UseAssemblyShaders()) {
-        *ssbo = BindlessSSBO{
-            .address = static_cast<GLuint64EXT>(info.address + info.offset),
-            .length = static_cast<GLsizei>(size),
-            .padding = 0,
-        };
-    } else {
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
-                          static_cast<GLsizeiptr>(size));
-    }
-}
-
 void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
     const bool via_header_index =
         maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
@@ -1128,6 +803,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
     }
 }
 
+void RasterizerOpenGL::SyncState() {
+    SyncViewport();
+    SyncRasterizeEnable();
+    SyncPolygonModes();
+    SyncColorMask();
+    SyncFragmentColorClampState();
+    SyncMultiSampleState();
+    SyncDepthTestState();
+    SyncDepthClamp();
+    SyncStencilTestState();
+    SyncBlendState();
+    SyncLogicOpState();
+    SyncCullMode();
+    SyncPrimitiveRestart();
+    SyncScissorTest();
+    SyncPointState();
+    SyncLineState();
+    SyncPolygonOffset();
+    SyncAlphaTest();
+    SyncFramebufferSRGB();
+    SyncVertexFormats();
+    SyncVertexInstances();
+}
+
 void RasterizerOpenGL::SyncViewport() {
     auto& flags = maxwell3d.dirty.flags;
     const auto& regs = maxwell3d.regs;
@@ -1163,9 +862,11 @@ void RasterizerOpenGL::SyncViewport() {
         if (regs.screen_y_control.y_negate != 0) {
             flip_y = !flip_y;
         }
-        glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
-                      regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
-                                                                       : GL_NEGATIVE_ONE_TO_ONE);
+        const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
+        const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+        const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
+        state_tracker.ClipControl(origin, depth);
+        state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
     }
 
     if (dirty_viewport) {
@@ -1649,36 +1350,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
     if (regs.tfb_enabled == 0) {
         return;
     }
-
     if (device.UseAssemblyShaders()) {
         SyncTransformFeedback();
     }
-
     UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
-
-    for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
-        const auto& binding = regs.tfb_bindings[index];
-        if (!binding.buffer_enable) {
-            if (enabled_transform_feedback_buffers[index]) {
-                glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
-                                  0);
-            }
-            enabled_transform_feedback_buffers[index] = false;
-            continue;
-        }
-        enabled_transform_feedback_buffers[index] = true;
-
-        auto& tfb_buffer = transform_feedback_buffers[index];
-        tfb_buffer.Create();
-
-        const GLuint handle = tfb_buffer.handle;
-        const std::size_t size = binding.buffer_size;
-        glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
-        glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
-                          static_cast<GLsizeiptr>(size));
-    }
+    UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
 
     // We may have to call BeginTransformFeedbackNV here since they seem to call different
     // implementations on Nvidia's driver (the pointer is different) but we are using
@@ -1692,23 +1370,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
     if (regs.tfb_enabled == 0) {
         return;
     }
-
     glEndTransformFeedback();
-
-    for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
-        const auto& binding = regs.tfb_bindings[index];
-        if (!binding.buffer_enable) {
-            continue;
-        }
-        UNIMPLEMENTED_IF(binding.buffer_offset != 0);
-
-        const GLuint handle = transform_feedback_buffers[index].handle;
-        const GPUVAddr gpu_addr = binding.Address();
-        const std::size_t size = binding.buffer_size;
-        const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
-        glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
-                                 static_cast<GLsizeiptr>(size));
-    }
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 82e03e677..3745cf637 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -30,7 +30,6 @@
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
 #include "video_core/shader/async_shaders.h"
 #include "video_core/textures/texture.h"
@@ -72,6 +71,7 @@ public:
     void DispatchCompute(GPUVAddr code_addr) override;
     void ResetCounter(VideoCore::QueryType type) override;
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
+    void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
     bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -119,27 +119,6 @@ private:
     void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
                       size_t& image_view_index, size_t& texture_index, size_t& image_index);
 
-    /// Configures the current constbuffers to use for the draw command.
-    void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
-
-    /// Configures the current constbuffers to use for the kernel invocation.
-    void SetupComputeConstBuffers(Shader* kernel);
-
-    /// Configures a constant buffer.
-    void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
-                          const ConstBufferEntry& entry, bool use_unified,
-                          std::size_t unified_offset);
-
-    /// Configures the current global memory entries to use for the draw command.
-    void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
-
-    /// Configures the current global memory entries to use for the kernel invocation.
-    void SetupComputeGlobalMemory(Shader* kernel);
-
-    /// Configures a global memory buffer.
-    void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
-                           size_t size, BindlessSSBO* ssbo);
-
     /// Configures the current textures to use for the draw command.
     void SetupDrawTextures(const Shader* shader, size_t stage_index);
 
@@ -152,6 +131,9 @@ private:
     /// Configures images in a compute shader.
     void SetupComputeImages(const Shader* shader);
 
+    /// Syncs state to match guest's
+    void SyncState();
+
     /// Syncs the viewport and depth range to match the guest state
     void SyncViewport();
 
@@ -215,6 +197,12 @@ private:
     /// Syncs the framebuffer sRGB state to match the guest state
     void SyncFramebufferSRGB();
 
+    /// Syncs vertex formats to match the guest state
+    void SyncVertexFormats();
+
+    /// Syncs vertex instances to match the guest state
+    void SyncVertexInstances();
+
     /// Syncs transform feedback state to match guest state
     /// @note Only valid on assembly shaders
     void SyncTransformFeedback();
@@ -225,19 +213,7 @@ private:
     /// End a transform feedback
     void EndTransformFeedback();
 
-    std::size_t CalculateVertexArraysSize() const;
-
-    std::size_t CalculateIndexBufferSize() const;
-
-    /// Updates the current vertex format
-    void SetupVertexFormat();
-
-    void SetupVertexBuffer();
-    void SetupVertexInstances();
-
-    GLintptr SetupIndexBuffer();
-
-    void SetupShaders();
+    void SetupShaders(bool is_indexed);
 
     Tegra::GPU& gpu;
     Tegra::Engines::Maxwell3D& maxwell3d;
@@ -249,12 +225,12 @@ private:
     ProgramManager& program_manager;
     StateTracker& state_tracker;
 
-    OGLStreamBuffer stream_buffer;
     TextureCacheRuntime texture_cache_runtime;
     TextureCache texture_cache;
+    BufferCacheRuntime buffer_cache_runtime;
+    BufferCache buffer_cache;
     ShaderCacheOpenGL shader_cache;
     QueryCache query_cache;
-    OGLBufferCache buffer_cache;
     FenceManagerOpenGL fence_manager;
 
     VideoCommon::Shader::AsyncShaders async_shaders;
@@ -262,20 +238,8 @@ private:
     boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
     std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
     boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
-    std::array<GLuint, MAX_TEXTURES> texture_handles;
-    std::array<GLuint, MAX_IMAGES> image_handles;
-
-    std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
-        transform_feedback_buffers;
-    std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
-        enabled_transform_feedback_buffers;
-
-    static constexpr std::size_t NUM_CONSTANT_BUFFERS =
-        Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
-        Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
-    std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
-    std::size_t current_cbuf = 0;
-    OGLBuffer unified_uniform_buffer;
+    std::array<GLuint, MAX_TEXTURES> texture_handles{};
+    std::array<GLuint, MAX_IMAGES> image_handles{};
 
     /// Number of commands queued to the OpenGL driver. Resetted on flush.
     std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0e34a0f20..3428e5e21 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -171,12 +171,6 @@ void OGLBuffer::Release() {
     handle = 0;
 }
 
-void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
-    ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
-
-    glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
-}
-
 void OGLSync::Create() {
     if (handle != 0)
         return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index f48398669..552d79db4 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -234,9 +234,6 @@ public:
     /// Deletes the internal OpenGL resource
     void Release();
 
-    // Converts the buffer into a stream copy buffer with a fixed size
-    void MakeStreamCopy(std::size_t buffer_size);
-
     GLuint handle = 0;
 };
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c35b71b6b..ac78d344c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
 constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
 constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
 
-constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
+constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
 #define ftou floatBitsToUint
 #define itof intBitsToFloat
 #define utof uintBitsToFloat
@@ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
 
 const float fswzadd_modifiers_a[] = float[4](-1.0f,  1.0f, -1.0f,  0.0f );
 const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f,  1.0f, -1.0f );
-
-layout (std140, binding = {}) uniform vs_config {{
-    float y_direction;
-}};
 )";
 
 class ShaderWriter final {
@@ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
     return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
 }
 
-bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
-    const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
-    // We waste one UBO for emulation
-    const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
-    return num_ubos > num_available_ubos;
-}
-
 struct GenericVaryingDescription {
     std::string name;
     u8 first_element = 0;
@@ -420,9 +409,8 @@ public:
     explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
                             ShaderType stage_, std::string_view identifier_,
                             std::string_view suffix_)
-        : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_},
-          suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{
-                                                       UseUnifiedUniforms(device_, ir_, stage_)} {
+        : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
+          identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
         if (stage != ShaderType::Compute) {
             transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
         }
@@ -516,7 +504,8 @@ private:
         if (!identifier.empty()) {
             code.AddLine("// {}", identifier);
         }
-        code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
+        const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
+        code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
         code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
         if (device.HasShaderBallot()) {
             code.AddLine("#extension GL_ARB_shader_ballot : require");
@@ -542,7 +531,7 @@ private:
 
         code.AddNewLine();
 
-        code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
+        code.AddLine(COMMON_DECLARATIONS);
     }
 
     void DeclareVertex() {
@@ -865,17 +854,6 @@ private:
     }
 
     void DeclareConstantBuffers() {
-        if (use_unified_uniforms) {
-            const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
-                                static_cast<u32>(ir.GetGlobalMemory().size());
-            code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
-                         binding);
-            code.AddLine("    uint cbufs[];");
-            code.AddLine("}};");
-            code.AddNewLine();
-            return;
-        }
-
         u32 binding = device.GetBaseBindings(stage).uniform_buffer;
         for (const auto& [index, info] : ir.GetConstantBuffers()) {
             const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
@@ -1081,29 +1059,17 @@ private:
 
         if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
             const Node offset = cbuf->GetOffset();
-            const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
 
             if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
                 // Direct access
                 const u32 offset_imm = immediate->GetValue();
                 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
-                if (use_unified_uniforms) {
-                    return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
-                            Type::Uint};
-                } else {
-                    return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
-                                        offset_imm / (4 * 4), (offset_imm / 4) % 4),
-                            Type::Uint};
-                }
-            }
-
-            // Indirect access
-            if (use_unified_uniforms) {
-                return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
-                                    Visit(offset).AsUint()),
+                return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+                                    offset_imm / (4 * 4), (offset_imm / 4) % 4),
                         Type::Uint};
             }
 
+            // Indirect access
             const std::string final_offset = code.GenerateTemporary();
             code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
 
@@ -2293,7 +2259,6 @@ private:
                 }
             }
         }
-
         if (header.ps.omap.depth) {
             // The depth output is always 2 registers after the last color output, and current_reg
             // already contains one past the last color register.
@@ -2337,7 +2302,8 @@ private:
     }
 
     Expression YNegate(Operation operation) {
-        return {"y_direction", Type::Float};
+        // Y_NEGATE is mapped to this uniform value
+        return {"gl_FrontMaterial.ambient.a", Type::Float};
     }
 
     template <u32 element>
@@ -2787,7 +2753,6 @@ private:
     const std::string_view identifier;
     const std::string_view suffix;
     const Header header;
-    const bool use_unified_uniforms;
     std::unordered_map<u8, VaryingTFB> transform_feedback;
 
     ShaderWriter code;
@@ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
     for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
         entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
     }
+    for (const auto& buffer : entries.const_buffers) {
+        entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
+    }
     entries.shader_length = ir.GetLength();
-    entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
     return entries;
 }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index be68994bb..0397a000c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -55,7 +55,7 @@ struct ShaderEntries {
     std::vector<ImageEntry> images;
     std::size_t shader_length{};
     u32 clip_distances{};
-    bool use_unified_uniforms{};
+    u32 enabled_uniform_buffers{};
 };
 
 ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 60e6fa39f..dbdf5230f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
     FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
 }
 
-void SetupDirtyVertexArrays(Tables& tables) {
-    static constexpr std::size_t num_array = 3;
+void SetupDirtyVertexInstances(Tables& tables) {
     static constexpr std::size_t instance_base_offset = 3;
     for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
         const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
-        const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
-
-        FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
-        FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
-
         const std::size_t instance_array_offset = array_offset + instance_base_offset;
         tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
         tables[1][instance_array_offset] = VertexInstances;
@@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) {
 StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
     auto& dirty = gpu.Maxwell3D().dirty;
     auto& tables = dirty.tables;
-    SetupDirtyRenderTargets(tables);
+    SetupDirtyFlags(tables);
     SetupDirtyColorMasks(tables);
     SetupDirtyViewports(tables);
     SetupDirtyScissors(tables);
-    SetupDirtyVertexArrays(tables);
+    SetupDirtyVertexInstances(tables);
     SetupDirtyVertexFormat(tables);
     SetupDirtyShaders(tables);
     SetupDirtyPolygonModes(tables);
@@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
     SetupDirtyClipControl(tables);
     SetupDirtyDepthClampEnabled(tables);
     SetupDirtyMisc(tables);
-
-    auto& store = dirty.on_write_stores;
-    store[VertexBuffers] = true;
-    for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
-        store[VertexBuffer0 + i] = true;
-    }
-}
-
-void StateTracker::InvalidateStreamBuffer() {
-    flags[Dirty::VertexBuffers] = true;
-    for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
-        flags[index] = true;
-    }
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 574615d3c..94c905116 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -28,10 +28,6 @@ enum : u8 {
     VertexFormat0,
     VertexFormat31 = VertexFormat0 + 31,
 
-    VertexBuffers,
-    VertexBuffer0,
-    VertexBuffer31 = VertexBuffer0 + 31,
-
     VertexInstances,
     VertexInstance0,
     VertexInstance31 = VertexInstance0 + 31,
@@ -92,8 +88,6 @@ class StateTracker {
 public:
     explicit StateTracker(Tegra::GPU& gpu);
 
-    void InvalidateStreamBuffer();
-
     void BindIndexBuffer(GLuint new_index_buffer) {
         if (index_buffer == new_index_buffer) {
             return;
@@ -110,13 +104,32 @@ public:
         glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
     }
 
+    void ClipControl(GLenum new_origin, GLenum new_depth) {
+        if (new_origin == origin && new_depth == depth) {
+            return;
+        }
+        origin = new_origin;
+        depth = new_depth;
+        glClipControl(origin, depth);
+    }
+
+    void SetYNegate(bool new_y_negate) {
+        if (new_y_negate == y_negate) {
+            return;
+        }
+        // Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
+        y_negate = new_y_negate;
+        const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
+        glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
+    }
+
     void NotifyScreenDrawVertexArray() {
         flags[OpenGL::Dirty::VertexFormats] = true;
         flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
         flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
 
-        flags[OpenGL::Dirty::VertexBuffers] = true;
-        flags[OpenGL::Dirty::VertexBuffer0] = true;
+        flags[VideoCommon::Dirty::VertexBuffers] = true;
+        flags[VideoCommon::Dirty::VertexBuffer0] = true;
 
         flags[OpenGL::Dirty::VertexInstances] = true;
         flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
@@ -202,6 +215,9 @@ private:
 
     GLuint framebuffer = 0;
     GLuint index_buffer = 0;
+    GLenum origin = GL_LOWER_LEFT;
+    GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
+    bool y_negate = false;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index e0819cdf2..77b3ee0fe 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -1,70 +1,64 @@
-// Copyright 2018 Citra Emulator Project
+// Copyright 2021 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <tuple>
-#include <vector>
+#include <array>
+#include <memory>
+#include <span>
+
+#include <glad/glad.h>
 
 #include "common/alignment.h"
 #include "common/assert.h"
-#include "common/microprofile.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"
 
-MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
-                    MP_RGB(128, 128, 192));
-
 namespace OpenGL {
 
-OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
-    : state_tracker{state_tracker_} {
-    gl_buffer.Create();
-
-    static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
-    glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
-    mapped_ptr = static_cast<u8*>(
-        glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
-
-    if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
-        glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
-        glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+StreamBuffer::StreamBuffer() {
+    static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
+    buffer.Create();
+    glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
+    glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
+    mapped_pointer =
+        static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
+    for (OGLSync& sync : fences) {
+        sync.Create();
     }
 }
 
-OGLStreamBuffer::~OGLStreamBuffer() {
-    glUnmapNamedBuffer(gl_buffer.handle);
-    gl_buffer.Release();
-}
-
-std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
-    ASSERT(size <= BUFFER_SIZE);
-    ASSERT(alignment <= BUFFER_SIZE);
-    mapped_size = size;
-
-    if (alignment > 0) {
-        buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
+std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
+    ASSERT(size < REGION_SIZE);
+    for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
+         ++region) {
+        fences[region].Create();
     }
+    used_iterator = iterator;
 
-    if (buffer_pos + size > BUFFER_SIZE) {
-        MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
-        glInvalidateBufferData(gl_buffer.handle);
-        state_tracker.InvalidateStreamBuffer();
-
-        buffer_pos = 0;
+    for (size_t region = Region(free_iterator) + 1,
+                region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
+         region < region_end; ++region) {
+        glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+        fences[region].Release();
     }
-
-    return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
-}
-
-void OGLStreamBuffer::Unmap(GLsizeiptr size) {
-    ASSERT(size <= mapped_size);
-
-    if (size > 0) {
-        glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
+    if (iterator + size >= free_iterator) {
+        free_iterator = iterator + size;
     }
-
-    buffer_pos += size;
+    if (iterator + size > STREAM_BUFFER_SIZE) {
+        for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
+            fences[region].Create();
+        }
+        used_iterator = 0;
+        iterator = 0;
+        free_iterator = size;
+
+        for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
+            glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+            fences[region].Release();
+        }
+    }
+    const size_t offset = iterator;
+    iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+    return {std::span(mapped_pointer + offset, size), offset};
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index dd9cf67eb..6dbb6bfba 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -1,9 +1,12 @@
-// Copyright 2018 Citra Emulator Project
+// Copyright 2021 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #pragma once
 
+#include <array>
+#include <memory>
+#include <span>
 #include <utility>
 
 #include <glad/glad.h>
@@ -13,48 +16,35 @@
 
 namespace OpenGL {
 
-class Device;
-class StateTracker;
+class StreamBuffer {
+    static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
+    static constexpr size_t NUM_SYNCS = 16;
+    static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
+    static constexpr size_t MAX_ALIGNMENT = 256;
+    static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
+    static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
+    static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
 
-class OGLStreamBuffer : private NonCopyable {
 public:
-    explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
-    ~OGLStreamBuffer();
-
-    /*
-     * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
-     * and the optional alignment requirement.
-     * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
-     * The return values are the pointer to the new chunk, and the offset within the buffer.
-     * The actual used size must be specified on unmapping the chunk.
-     */
-    std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
-
-    void Unmap(GLsizeiptr size);
-
-    GLuint Handle() const {
-        return gl_buffer.handle;
-    }
+    explicit StreamBuffer();
 
-    u64 Address() const {
-        return gpu_address;
-    }
+    [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
 
-    GLsizeiptr Size() const noexcept {
-        return BUFFER_SIZE;
+    [[nodiscard]] GLuint Handle() const noexcept {
+        return buffer.handle;
     }
 
 private:
-    static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
-
-    StateTracker& state_tracker;
-
-    OGLBuffer gl_buffer;
+    [[nodiscard]] static size_t Region(size_t offset) noexcept {
+        return offset / REGION_SIZE;
+    }
 
-    GLuint64EXT gpu_address = 0;
-    GLintptr buffer_pos = 0;
-    GLsizeiptr mapped_size = 0;
-    u8* mapped_ptr = nullptr;
+    size_t iterator = 0;
+    size_t used_iterator = 0;
+    size_t free_iterator = 0;
+    u8* mapped_pointer = nullptr;
+    OGLBuffer buffer;
+    std::array<OGLSync, NUM_SYNCS> fences;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 546cb6d00..12434db67 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
 
 } // Anonymous namespace
 
-ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
-    : span(map, size), sync{sync_}, handle{handle_} {}
-
 ImageBufferMap::~ImageBufferMap() {
     if (sync) {
         sync->Create();
@@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() {
     glFinish();
 }
 
-ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
+ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
     return upload_buffers.RequestMap(size, true);
 }
 
-ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
+ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
     return download_buffers.RequestMap(size, false);
 }
 
@@ -553,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
 }
 
 void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
-                                                size_t buffer_offset,
                                                 std::span<const SwizzleParameters> swizzles) {
     switch (image.info.type) {
     case ImageType::e2D:
-        return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles);
+        return util_shaders.BlockLinearUpload2D(image, map, swizzles);
     case ImageType::e3D:
-        return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles);
+        return util_shaders.BlockLinearUpload3D(image, map, swizzles);
     case ImageType::Linear:
-        return util_shaders.PitchUpload(image, map, buffer_offset, swizzles);
+        return util_shaders.PitchUpload(image, map, swizzles);
     default:
         UNREACHABLE();
         break;
@@ -596,7 +592,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_
                                                                bool insert_fence) {
     const size_t index = RequestBuffer(requested_size);
     OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
-    return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
+    return ImageBufferMap{
+        .mapped_span = std::span(maps[index], requested_size),
+        .sync = sync,
+        .buffer = buffers[index].handle,
+    };
 }
 
 size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
@@ -709,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
     }
 }
 
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::UploadMemory(const ImageBufferMap& map,
                          std::span<const VideoCommon::BufferImageCopy> copies) {
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
-    glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
+    glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
 
     glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
 
@@ -728,23 +728,23 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
             current_image_height = copy.buffer_image_height;
             glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
         }
-        CopyBufferToImage(copy, buffer_offset);
+        CopyBufferToImage(copy, map.offset);
     }
 }
 
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::UploadMemory(const ImageBufferMap& map,
                          std::span<const VideoCommon::BufferCopy> copies) {
     for (const VideoCommon::BufferCopy& copy : copies) {
-        glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
+        glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
                                  copy.dst_offset, copy.size);
     }
 }
 
-void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
+void Image::DownloadMemory(ImageBufferMap& map,
                            std::span<const VideoCommon::BufferImageCopy> copies) {
     glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
 
-    glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
     glPixelStorei(GL_PACK_ALIGNMENT, 1);
 
     u32 current_row_length = std::numeric_limits<u32>::max();
@@ -759,7 +759,38 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
             current_image_height = copy.buffer_image_height;
             glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
         }
-        CopyImageToBuffer(copy, buffer_offset);
+        CopyImageToBuffer(copy, map.offset);
+    }
+}
+
+GLuint Image::StorageHandle() noexcept {
+    switch (info.format) {
+    case PixelFormat::A8B8G8R8_SRGB:
+    case PixelFormat::B8G8R8A8_SRGB:
+    case PixelFormat::BC1_RGBA_SRGB:
+    case PixelFormat::BC2_SRGB:
+    case PixelFormat::BC3_SRGB:
+    case PixelFormat::BC7_SRGB:
+    case PixelFormat::ASTC_2D_4X4_SRGB:
+    case PixelFormat::ASTC_2D_8X8_SRGB:
+    case PixelFormat::ASTC_2D_8X5_SRGB:
+    case PixelFormat::ASTC_2D_5X4_SRGB:
+    case PixelFormat::ASTC_2D_5X5_SRGB:
+    case PixelFormat::ASTC_2D_10X8_SRGB:
+    case PixelFormat::ASTC_2D_6X6_SRGB:
+    case PixelFormat::ASTC_2D_10X10_SRGB:
+    case PixelFormat::ASTC_2D_12X12_SRGB:
+    case PixelFormat::ASTC_2D_8X6_SRGB:
+    case PixelFormat::ASTC_2D_6X5_SRGB:
+        if (store_view.handle != 0) {
+            return store_view.handle;
+        }
+        store_view.Create();
+        glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0,
+                      info.resources.levels, 0, info.resources.layers);
+        return store_view.handle;
+    default:
+        return texture.handle;
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 15b7c3676..a6172f009 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -31,23 +31,13 @@ using VideoCommon::NUM_RT;
 using VideoCommon::Offset2D;
 using VideoCommon::RenderTargets;
 
-class ImageBufferMap {
-public:
-    explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
+struct ImageBufferMap {
     ~ImageBufferMap();
 
-    GLuint Handle() const noexcept {
-        return handle;
-    }
-
-    std::span<u8> Span() const noexcept {
-        return span;
-    }
-
-private:
-    std::span<u8> span;
+    std::span<u8> mapped_span;
+    size_t offset = 0;
     OGLSync* sync;
-    GLuint handle;
+    GLuint buffer;
 };
 
 struct FormatProperties {
@@ -69,9 +59,9 @@ public:
 
     void Finish();
 
-    ImageBufferMap MapUploadBuffer(size_t size);
+    ImageBufferMap UploadStagingBuffer(size_t size);
 
-    ImageBufferMap MapDownloadBuffer(size_t size);
+    ImageBufferMap DownloadStagingBuffer(size_t size);
 
     void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
 
@@ -89,7 +79,7 @@ public:
                          Tegra::Engines::Fermi2D::Filter filter,
                          Tegra::Engines::Fermi2D::Operation operation);
 
-    void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+    void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
                                std::span<const VideoCommon::SwizzleParameters> swizzles);
 
     void InsertUploadMemoryBarrier();
@@ -148,14 +138,14 @@ public:
     explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
                    VAddr cpu_addr);
 
-    void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+    void UploadMemory(const ImageBufferMap& map,
                       std::span<const VideoCommon::BufferImageCopy> copies);
 
-    void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
-                      std::span<const VideoCommon::BufferCopy> copies);
+    void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
+
+    void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
 
-    void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
-                        std::span<const VideoCommon::BufferImageCopy> copies);
+    GLuint StorageHandle() noexcept;
 
     GLuint Handle() const noexcept {
         return texture.handle;
@@ -167,8 +157,8 @@ private:
     void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
 
     OGLTexture texture;
-    OGLTextureView store_view;
     OGLBuffer buffer;
+    OGLTextureView store_view;
     GLenum gl_internal_format = GL_NONE;
     GLenum gl_format = GL_NONE;
     GLenum gl_type = GL_NONE;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index cbccfdeb4..f7ad8f370 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -4,23 +4,10 @@
 
 #pragma once
 
-#include <array>
 #include <glad/glad.h>
-#include "common/common_types.h"
-#include "common/logging/log.h"
 #include "video_core/engines/maxwell_3d.h"
 
-namespace OpenGL {
-
-using GLvec2 = std::array<GLfloat, 2>;
-using GLvec3 = std::array<GLfloat, 3>;
-using GLvec4 = std::array<GLfloat, 4>;
-
-using GLuvec2 = std::array<GLuint, 2>;
-using GLuvec3 = std::array<GLuint, 3>;
-using GLuvec4 = std::array<GLuint, 4>;
-
-namespace MaxwellToGL {
+namespace OpenGL::MaxwellToGL {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
@@ -317,26 +304,6 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
     return GL_ZERO;
 }
 
-inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
-    switch (source) {
-    case Tegra::Texture::SwizzleSource::Zero:
-        return GL_ZERO;
-    case Tegra::Texture::SwizzleSource::R:
-        return GL_RED;
-    case Tegra::Texture::SwizzleSource::G:
-        return GL_GREEN;
-    case Tegra::Texture::SwizzleSource::B:
-        return GL_BLUE;
-    case Tegra::Texture::SwizzleSource::A:
-        return GL_ALPHA;
-    case Tegra::Texture::SwizzleSource::OneInt:
-    case Tegra::Texture::SwizzleSource::OneFloat:
-        return GL_ONE;
-    }
-    UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source);
-    return GL_ZERO;
-}
-
 inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
     switch (comparison) {
     case Maxwell::ComparisonOp::Never:
@@ -493,5 +460,4 @@ inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
     return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
 }
 
-} // namespace MaxwellToGL
-} // namespace OpenGL
+} // namespace OpenGL::MaxwellToGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 21159e498..9d2acd4d9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -29,9 +29,7 @@
 #include "video_core/textures/decoders.h"
 
 namespace OpenGL {
-
 namespace {
-
 constexpr GLint PositionLocation = 0;
 constexpr GLint TexCoordLocation = 1;
 constexpr GLint ModelViewMatrixLocation = 0;
@@ -124,7 +122,6 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
         break;
     }
 }
-
 } // Anonymous namespace
 
 RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
@@ -132,7 +129,17 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
                                Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
                                std::unique_ptr<Core::Frontend::GraphicsContext> context_)
     : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
-      emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
+      emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
+      program_manager{device},
+      rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
+    if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
+        glEnable(GL_DEBUG_OUTPUT);
+        glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
+        glDebugMessageCallback(DebugHandler, nullptr);
+    }
+    AddTelemetryFields();
+    InitOpenGLObjects();
+}
 
 RendererOpenGL::~RendererOpenGL() = default;
 
@@ -148,7 +155,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 
     ++m_current_frame;
 
-    rasterizer->TickFrame();
+    rasterizer.TickFrame();
 
     context->SwapBuffers();
     render_window.OnFrameDisplayed();
@@ -179,7 +186,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
     framebuffer_crop_rect = framebuffer.crop_rect;
 
     const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
-    if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
+    if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
         return;
     }
 
@@ -267,6 +274,7 @@ void RendererOpenGL::InitOpenGLObjects() {
     // Enable unified vertex attributes and query vertex buffer address when the driver supports it
     if (device.HasVertexBufferUnifiedMemory()) {
         glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+        glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
 
         glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
         glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
@@ -289,14 +297,6 @@ void RendererOpenGL::AddTelemetryFields() {
     telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
 }
 
-void RendererOpenGL::CreateRasterizer() {
-    if (rasterizer) {
-        return;
-    }
-    rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
-                                                    screen_info, program_manager, state_tracker);
-}
-
 void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
                                                  const Tegra::FramebufferConfig& framebuffer) {
     texture.width = framebuffer.width;
@@ -407,6 +407,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
 
     program_manager.BindHostPipeline(pipeline.handle);
 
+    state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
     glEnable(GL_CULL_FACE);
     if (screen_info.display_srgb) {
         glEnable(GL_FRAMEBUFFER_SRGB);
@@ -425,7 +426,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
     glCullFace(GL_BACK);
     glFrontFace(GL_CW);
     glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
-    glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
     glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
                        static_cast<GLfloat>(layout.height));
     glDepthRangeIndexed(0, 0.0, 0.0);
@@ -497,25 +497,4 @@ void RendererOpenGL::RenderScreenshot() {
     renderer_settings.screenshot_requested = false;
 }
 
-bool RendererOpenGL::Init() {
-    if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
-        glEnable(GL_DEBUG_OUTPUT);
-        glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
-        glDebugMessageCallback(DebugHandler, nullptr);
-    }
-
-    AddTelemetryFields();
-
-    if (!GLAD_GL_VERSION_4_6) {
-        return false;
-    }
-
-    InitOpenGLObjects();
-    CreateRasterizer();
-
-    return true;
-}
-
-void RendererOpenGL::ShutDown() {}
-
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 44e109794..cc19a110f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,6 +10,7 @@
 #include "common/math_util.h"
 #include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -63,18 +64,18 @@ public:
                             std::unique_ptr<Core::Frontend::GraphicsContext> context_);
     ~RendererOpenGL() override;
 
-    bool Init() override;
-    void ShutDown() override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
 
+    VideoCore::RasterizerInterface* ReadRasterizer() override {
+        return &rasterizer;
+    }
+
 private:
     /// Initializes the OpenGL state and creates persistent objects.
     void InitOpenGLObjects();
 
     void AddTelemetryFields();
 
-    void CreateRasterizer();
-
     void ConfigureFramebufferTexture(TextureInfo& texture,
                                      const Tegra::FramebufferConfig& framebuffer);
 
@@ -98,8 +99,10 @@ private:
     Core::Memory::Memory& cpu_memory;
     Tegra::GPU& gpu;
 
-    const Device device;
-    StateTracker state_tracker{gpu};
+    Device device;
+    StateTracker state_tracker;
+    ProgramManager program_manager;
+    RasterizerOpenGL rasterizer;
 
     // OpenGL object IDs
     OGLSampler present_sampler;
@@ -115,9 +118,6 @@ private:
     /// Display information for Switch screen
     ScreenInfo screen_info;
 
-    /// Global dummy shader pipeline
-    ProgramManager program_manager;
-
     /// OpenGL framebuffer data
     std::vector<u8> gl_framebuffer_data;
 
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index eb849cbf2..31ec68505 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
 
 UtilShaders::~UtilShaders() = default;
 
-void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
                                       std::span<const SwizzleParameters> swizzles) {
     static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
     static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
 
     program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
-    glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+    glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
 
     const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
     for (const SwizzleParameters& swizzle : swizzles) {
         const Extent3D num_tiles = swizzle.num_tiles;
-        const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+        const size_t input_offset = swizzle.buffer_offset + map.offset;
 
         const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
         const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -91,16 +91,16 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
         glUniform1ui(5, params.x_shift);
         glUniform1ui(6, params.block_height);
         glUniform1ui(7, params.block_height_mask);
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
-                          input_offset, image.guest_size_bytes - swizzle.buffer_offset);
-        glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+                          image.guest_size_bytes - swizzle.buffer_offset);
+        glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
                            GL_WRITE_ONLY, store_format);
         glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
     }
     program_manager.RestoreGuestCompute();
 }
 
-void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
                                       std::span<const SwizzleParameters> swizzles) {
     static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
 
@@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
     static constexpr GLuint BINDING_INPUT_BUFFER = 1;
     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
 
-    glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+    glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
     program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
 
     const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
     for (const SwizzleParameters& swizzle : swizzles) {
         const Extent3D num_tiles = swizzle.num_tiles;
-        const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+        const size_t input_offset = swizzle.buffer_offset + map.offset;
 
         const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
         const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -132,16 +132,16 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
         glUniform1ui(7, params.block_height_mask);
         glUniform1ui(8, params.block_depth);
         glUniform1ui(9, params.block_depth_mask);
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
-                          input_offset, image.guest_size_bytes - swizzle.buffer_offset);
-        glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+                          image.guest_size_bytes - swizzle.buffer_offset);
+        glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
                            GL_WRITE_ONLY, store_format);
         glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
     }
     program_manager.RestoreGuestCompute();
 }
 
-void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
                               std::span<const SwizzleParameters> swizzles) {
     static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
     static constexpr GLuint BINDING_INPUT_BUFFER = 0;
@@ -159,21 +159,22 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
                          "Non-power of two images are not implemented");
 
     program_manager.BindHostCompute(pitch_unswizzle_program.handle);
-    glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+    glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
     glUniform2ui(LOC_ORIGIN, 0, 0);
     glUniform2i(LOC_DESTINATION, 0, 0);
     glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
     glUniform1ui(LOC_PITCH, pitch);
-    glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
+    glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY,
+                       format);
     for (const SwizzleParameters& swizzle : swizzles) {
         const Extent3D num_tiles = swizzle.num_tiles;
-        const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+        const size_t input_offset = swizzle.buffer_offset + map.offset;
 
         const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
         const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
 
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
-                          input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+                          image.guest_size_bytes - swizzle.buffer_offset);
         glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
     }
     program_manager.RestoreGuestCompute();
@@ -195,9 +196,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
 
         glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
         glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
-        glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
-                           GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
-        glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
+        glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
+                           copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+        glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
                            copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
         glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
     }
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 359997255..7b1d16b09 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -15,21 +15,22 @@
 namespace OpenGL {
 
 class Image;
-class ImageBufferMap;
 class ProgramManager;
 
+struct ImageBufferMap;
+
 class UtilShaders {
 public:
     explicit UtilShaders(ProgramManager& program_manager);
     ~UtilShaders();
 
-    void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+    void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
                              std::span<const VideoCommon::SwizzleParameters> swizzles);
 
-    void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+    void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
                              std::span<const VideoCommon::SwizzleParameters> swizzles);
 
-    void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+    void PitchUpload(Image& image, const ImageBufferMap& map,
                      std::span<const VideoCommon::SwizzleParameters> swizzles);
 
     void CopyBC4(Image& dst_image, Image& src_image,
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 5be6dabd9..362278f01 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -12,14 +12,15 @@
 #include "common/cityhash.h"
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
 
 namespace Vulkan {
 
 namespace {
 
-constexpr std::size_t POINT = 0;
-constexpr std::size_t LINE = 1;
-constexpr std::size_t POLYGON = 2;
+constexpr size_t POINT = 0;
+constexpr size_t LINE = 1;
+constexpr size_t POLYGON = 2;
 constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
     POINT,   // Points
     LINE,    // Lines
@@ -40,10 +41,14 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
 
 } // Anonymous namespace
 
-void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {
-    const std::array enabled_lut = {regs.polygon_offset_point_enable,
-                                    regs.polygon_offset_line_enable,
-                                    regs.polygon_offset_fill_enable};
+void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
+                                 bool has_extended_dynamic_state) {
+    const Maxwell& regs = maxwell3d.regs;
+    const std::array enabled_lut{
+        regs.polygon_offset_point_enable,
+        regs.polygon_offset_line_enable,
+        regs.polygon_offset_fill_enable,
+    };
     const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
 
     raw1 = 0;
@@ -64,45 +69,53 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
 
     raw2 = 0;
     const auto test_func =
-        regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
+        regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
     alpha_test_func.Assign(PackComparisonOp(test_func));
     early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
 
     alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
     point_size = Common::BitCast<u32>(regs.point_size);
 
-    for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        binding_divisors[index] =
-            regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0;
+    if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) {
+        maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false;
+        for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+            const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
+            binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
+        }
     }
-
-    for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
-        const auto& input = regs.vertex_attrib_format[index];
-        auto& attribute = attributes[index];
-        attribute.raw = 0;
-        attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
-        attribute.buffer.Assign(input.buffer);
-        attribute.offset.Assign(input.offset);
-        attribute.type.Assign(static_cast<u32>(input.type.Value()));
-        attribute.size.Assign(static_cast<u32>(input.size.Value()));
-        attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0);
+    if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) {
+        maxwell3d.dirty.flags[Dirty::VertexAttributes] = false;
+        for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+            const auto& input = regs.vertex_attrib_format[index];
+            auto& attribute = attributes[index];
+            attribute.raw = 0;
+            attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
+            attribute.buffer.Assign(input.buffer);
+            attribute.offset.Assign(input.offset);
+            attribute.type.Assign(static_cast<u32>(input.type.Value()));
+            attribute.size.Assign(static_cast<u32>(input.size.Value()));
+        }
     }
-
-    for (std::size_t index = 0; index < std::size(attachments); ++index) {
-        attachments[index].Fill(regs, index);
+    if (maxwell3d.dirty.flags[Dirty::Blending]) {
+        maxwell3d.dirty.flags[Dirty::Blending] = false;
+        for (size_t index = 0; index < attachments.size(); ++index) {
+            attachments[index].Refresh(regs, index);
+        }
+    }
+    if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
+        maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
+        const auto& transform = regs.viewport_transform;
+        std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) {
+            return static_cast<u16>(viewport.swizzle.raw);
+        });
     }
-
-    const auto& transform = regs.viewport_transform;
-    std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),
-                   [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
-
     if (!has_extended_dynamic_state) {
         no_extended_dynamic_state.Assign(1);
-        dynamic_state.Fill(regs);
+        dynamic_state.Refresh(regs);
     }
 }
 
-void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
+void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
     const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
 
     raw = 0;
@@ -141,7 +154,7 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
     enable.Assign(1);
 }
 
-void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
+void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
     u32 packed_front_face = PackFrontFace(regs.front_face);
     if (regs.screen_y_control.triangle_rast_flip != 0) {
         // Flip front face
@@ -178,9 +191,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
     });
 }
 
-std::size_t FixedPipelineState::Hash() const noexcept {
+size_t FixedPipelineState::Hash() const noexcept {
     const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
-    return static_cast<std::size_t>(hash);
+    return static_cast<size_t>(hash);
 }
 
 bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 465a55fdb..a0eb83a68 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -58,7 +58,7 @@ struct FixedPipelineState {
             BitField<30, 1, u32> enable;
         };
 
-        void Fill(const Maxwell& regs, std::size_t index);
+        void Refresh(const Maxwell& regs, size_t index);
 
         constexpr std::array<bool, 4> Mask() const noexcept {
             return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
@@ -96,8 +96,6 @@ struct FixedPipelineState {
         BitField<6, 14, u32> offset;
         BitField<20, 3, u32> type;
         BitField<23, 6, u32> size;
-        // Not really an element of a vertex attribute, but it can be packed here
-        BitField<29, 1, u32> binding_index_enabled;
 
         constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
             return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
@@ -108,7 +106,7 @@ struct FixedPipelineState {
         }
     };
 
-    template <std::size_t Position>
+    template <size_t Position>
     union StencilFace {
         BitField<Position + 0, 3, u32> action_stencil_fail;
         BitField<Position + 3, 3, u32> action_depth_fail;
@@ -152,7 +150,7 @@ struct FixedPipelineState {
         // Vertex stride is a 12 bits value, we have 4 bits to spare per element
         std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
 
-        void Fill(const Maxwell& regs);
+        void Refresh(const Maxwell& regs);
 
         Maxwell::ComparisonOp DepthTestFunc() const noexcept {
             return UnpackComparisonOp(depth_test_func);
@@ -199,9 +197,9 @@ struct FixedPipelineState {
     std::array<u16, Maxwell::NumViewports> viewport_swizzles;
     DynamicState dynamic_state;
 
-    void Fill(const Maxwell& regs, bool has_extended_dynamic_state);
+    void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state);
 
-    std::size_t Hash() const noexcept;
+    size_t Hash() const noexcept;
 
     bool operator==(const FixedPipelineState& rhs) const noexcept;
 
@@ -209,8 +207,8 @@ struct FixedPipelineState {
         return !operator==(rhs);
     }
 
-    std::size_t Size() const noexcept {
-        const std::size_t total_size = sizeof *this;
+    size_t Size() const noexcept {
+        const size_t total_size = sizeof *this;
         return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
     }
 };
@@ -224,7 +222,7 @@ namespace std {
 
 template <>
 struct hash<Vulkan::FixedPipelineState> {
-    std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
+    size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
         return k.Hash();
     }
 };
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 85121d9fd..19aaf034f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
     return {};
 }
 
-VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) {
+VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
     switch (index_format) {
     case Maxwell::IndexFormat::UnsignedByte:
-        if (!device.IsExtIndexTypeUint8Supported()) {
-            UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
-            return VK_INDEX_TYPE_UINT16;
-        }
         return VK_INDEX_TYPE_UINT8_EXT;
     case Maxwell::IndexFormat::UnsignedShort:
         return VK_INDEX_TYPE_UINT16;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7c34b47dc..e3e06ba38 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
 
 VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
 
-VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format);
+VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
 
 VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
 
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 61796e33a..1cc720ddd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
     return separated_extensions;
 }
 
+Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
+                    VkSurfaceKHR surface) {
+    const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
+    const s32 device_index = Settings::values.vulkan_device.GetValue();
+    if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
+        LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
+        throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+    }
+    const vk::PhysicalDevice physical_device(devices[device_index], dld);
+    return Device(*instance, physical_device, surface, dld);
+}
 } // Anonymous namespace
 
 RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
                                Core::Frontend::EmuWindow& emu_window,
                                Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
-                               std::unique_ptr<Core::Frontend::GraphicsContext> context_)
-    : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_},
-      cpu_memory{cpu_memory_}, gpu{gpu_} {}
+                               std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
+    : RendererBase(emu_window, std::move(context_)),
+      telemetry_session(telemetry_session_),
+      cpu_memory(cpu_memory_),
+      gpu(gpu_),
+      library(OpenLibrary()),
+      instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
+                              true, Settings::values.renderer_debug)),
+      debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
+      surface(CreateSurface(instance, render_window)),
+      device(CreateDevice(instance, dld, *surface)),
+      memory_allocator(device, false),
+      state_tracker(gpu),
+      scheduler(device, state_tracker),
+      swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
+                render_window.GetFramebufferLayout().height, false),
+      blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
+                  screen_info),
+      rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
+                 memory_allocator, state_tracker, scheduler) {
+    Report();
+} catch (const vk::Exception& exception) {
+    LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
+    throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
+}
 
 RendererVulkan::~RendererVulkan() {
-    ShutDown();
+    void(device.GetLogical().WaitIdle());
 }
 
 void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
         const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
         const bool use_accelerated =
-            rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
+            rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
         const bool is_srgb = use_accelerated && screen_info.is_srgb;
-        if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
-            swapchain->Create(layout.width, layout.height, is_srgb);
-            blit_screen->Recreate();
+        if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
+            swapchain.Create(layout.width, layout.height, is_srgb);
+            blit_screen.Recreate();
         }
 
-        scheduler->WaitWorker();
+        scheduler.WaitWorker();
 
-        swapchain->AcquireNextImage();
-        const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated);
+        swapchain.AcquireNextImage();
+        const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
 
-        scheduler->Flush(render_semaphore);
+        scheduler.Flush(render_semaphore);
 
-        if (swapchain->Present(render_semaphore)) {
-            blit_screen->Recreate();
+        if (swapchain.Present(render_semaphore)) {
+            blit_screen.Recreate();
         }
-
-        rasterizer->TickFrame();
+        rasterizer.TickFrame();
     }
 
     render_window.OnFrameDisplayed();
 }
 
-bool RendererVulkan::Init() try {
-    library = OpenLibrary();
-    instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
-                              true, Settings::values.renderer_debug);
-    if (Settings::values.renderer_debug) {
-        debug_callback = CreateDebugCallback(instance);
-    }
-    surface = CreateSurface(instance, render_window);
-
-    InitializeDevice();
-    Report();
-
-    memory_allocator = std::make_unique<MemoryAllocator>(*device);
-
-    state_tracker = std::make_unique<StateTracker>(gpu);
-
-    scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
-
-    const auto& framebuffer = render_window.GetFramebufferLayout();
-    swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
-    swapchain->Create(framebuffer.width, framebuffer.height, false);
-
-    rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
-                                                    cpu_memory, screen_info, *device,
-                                                    *memory_allocator, *state_tracker, *scheduler);
-
-    blit_screen =
-        std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
-                                       *memory_allocator, *swapchain, *scheduler, screen_info);
-    return true;
-
-} catch (const vk::Exception& exception) {
-    LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
-    return false;
-}
-
-void RendererVulkan::ShutDown() {
-    if (!device) {
-        return;
-    }
-    if (const auto& dev = device->GetLogical()) {
-        dev.WaitIdle();
-    }
-    rasterizer.reset();
-    blit_screen.reset();
-    scheduler.reset();
-    swapchain.reset();
-    memory_allocator.reset();
-    device.reset();
-}
-
-void RendererVulkan::InitializeDevice() {
-    const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
-    const s32 device_index = Settings::values.vulkan_device.GetValue();
-    if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
-        LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
-        throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
-    }
-    const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
-    device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
-}
-
 void RendererVulkan::Report() const {
-    const std::string vendor_name{device->GetVendorName()};
-    const std::string model_name{device->GetModelName()};
-    const std::string driver_version = GetDriverVersion(*device);
+    const std::string vendor_name{device.GetVendorName()};
+    const std::string model_name{device.GetModelName()};
+    const std::string driver_version = GetDriverVersion(device);
     const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
 
-    const std::string api_version = GetReadableVersion(device->ApiVersion());
+    const std::string api_version = GetReadableVersion(device.ApiVersion());
 
-    const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
+    const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
 
     LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
     LOG_INFO(Render_Vulkan, "Device: {}", model_name);
@@ -209,21 +179,4 @@ void RendererVulkan::Report() const {
     telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
 }
 
-std::vector<std::string> RendererVulkan::EnumerateDevices() try {
-    vk::InstanceDispatch dld;
-    const Common::DynamicLibrary library = OpenLibrary();
-    const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
-    const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
-    std::vector<std::string> names;
-    names.reserve(physical_devices.size());
-    for (const VkPhysicalDevice device : physical_devices) {
-        names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
-    }
-    return names;
-
-} catch (const vk::Exception& exception) {
-    LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
-    return {};
-}
-
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index daf55b9b4..72071316c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -9,8 +9,14 @@
 #include <vector>
 
 #include "common/dynamic_library.h"
-
 #include "video_core/renderer_base.h"
+#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Core {
@@ -27,20 +33,6 @@ class GPU;
 
 namespace Vulkan {
 
-class Device;
-class StateTracker;
-class MemoryAllocator;
-class VKBlitScreen;
-class VKSwapchain;
-class VKScheduler;
-
-struct VKScreenInfo {
-    VkImageView image_view{};
-    u32 width{};
-    u32 height{};
-    bool is_srgb{};
-};
-
 class RendererVulkan final : public VideoCore::RendererBase {
 public:
     explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -49,15 +41,13 @@ public:
                             std::unique_ptr<Core::Frontend::GraphicsContext> context_);
     ~RendererVulkan() override;
 
-    bool Init() override;
-    void ShutDown() override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
 
-    static std::vector<std::string> EnumerateDevices();
+    VideoCore::RasterizerInterface* ReadRasterizer() override {
+        return &rasterizer;
+    }
 
 private:
-    void InitializeDevice();
-
     void Report() const;
 
     Core::TelemetrySession& telemetry_session;
@@ -68,18 +58,18 @@ private:
     vk::InstanceDispatch dld;
 
     vk::Instance instance;
-
+    vk::DebugUtilsMessenger debug_callback;
     vk::SurfaceKHR surface;
 
     VKScreenInfo screen_info;
 
-    vk::DebugUtilsMessenger debug_callback;
-    std::unique_ptr<Device> device;
-    std::unique_ptr<MemoryAllocator> memory_allocator;
-    std::unique_ptr<StateTracker> state_tracker;
-    std::unique_ptr<VKScheduler> scheduler;
-    std::unique_ptr<VKSwapchain> swapchain;
-    std::unique_ptr<VKBlitScreen> blit_screen;
+    Device device;
+    MemoryAllocator memory_allocator;
+    StateTracker state_tracker;
+    VKScheduler scheduler;
+    VKSwapchain swapchain;
+    VKBlitScreen blit_screen;
+    RasterizerVulkan rasterizer;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 3e3b895e0..a1a32aabe 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -18,7 +18,6 @@
 #include "video_core/gpu.h"
 #include "video_core/host_shaders/vulkan_present_frag_spv.h"
 #include "video_core/host_shaders/vulkan_present_vert_spv.h"
-#include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #include "video_core/renderer_vulkan/vk_blit_screen.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
@@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData {
 };
 
 VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
-                           Core::Frontend::EmuWindow& render_window_,
-                           VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+                           Core::Frontend::EmuWindow& render_window_, const Device& device_,
                            MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
                            VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
-    : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
-      device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_},
-      scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
+    : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
+      memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
+      image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
     resource_ticks.resize(image_count);
 
     CreateStaticResources();
@@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
     SetUniformData(data, framebuffer);
     SetVertexData(data, framebuffer);
 
-    const std::span<u8> map = buffer_commit.Map();
-    std::memcpy(map.data(), &data, sizeof(data));
+    const std::span<u8> mapped_span = buffer_commit.Map();
+    std::memcpy(mapped_span.data(), &data, sizeof(data));
 
     if (!use_accelerated) {
         const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
@@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
         const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
         const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
         const size_t size_bytes = GetSizeInBytes(framebuffer);
-        rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
 
         // TODO(Rodrigo): Read this from HLE
         constexpr u32 block_height_log2 = 4;
         const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
         Tegra::Texture::UnswizzleTexture(
-            map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel,
-            framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
+            mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
+            bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
 
         const VkBufferImageCopy copy{
             .bufferOffset = image_offset,
@@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
         cmdbuf.Draw(4, 1, 0, 0);
         cmdbuf.EndRenderPass();
     });
-
     return *semaphores[image_index];
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index b52576957..5e3177685 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -38,12 +38,18 @@ class RasterizerVulkan;
 class VKScheduler;
 class VKSwapchain;
 
-class VKBlitScreen final {
+struct VKScreenInfo {
+    VkImageView image_view{};
+    u32 width{};
+    u32 height{};
+    bool is_srgb{};
+};
+
+class VKBlitScreen {
 public:
     explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
-                          Core::Frontend::EmuWindow& render_window,
-                          VideoCore::RasterizerInterface& rasterizer, const Device& device,
-                          MemoryAllocator& memory_allocator, VKSwapchain& swapchain,
+                          Core::Frontend::EmuWindow& render_window, const Device& device,
+                          MemoryAllocator& memory_manager, VKSwapchain& swapchain,
                           VKScheduler& scheduler, const VKScreenInfo& screen_info);
     ~VKBlitScreen();
 
@@ -84,7 +90,6 @@ private:
 
     Core::Memory::Memory& cpu_memory;
     Core::Frontend::EmuWindow& render_window;
-    VideoCore::RasterizerInterface& rasterizer;
     const Device& device;
     MemoryAllocator& memory_allocator;
     VKSwapchain& swapchain;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index d8ad40a0f..848eedd66 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -3,188 +3,308 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <array>
 #include <cstring>
-#include <memory>
+#include <span>
+#include <vector>
 
-#include "core/core.h"
 #include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
-
 namespace {
+VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
+    return VkBufferCopy{
+        .srcOffset = copy.src_offset,
+        .dstOffset = copy.dst_offset,
+        .size = copy.size,
+    };
+}
 
-constexpr VkBufferUsageFlags BUFFER_USAGE =
-    VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
-    VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-
-constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE =
-    VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
-    VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
-    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
-
-constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
-    VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
-    VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
+VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
+    if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
+        return VK_INDEX_TYPE_UINT8_EXT;
+    }
+    if (num_elements <= 0xffff) {
+        return VK_INDEX_TYPE_UINT16;
+    }
+    return VK_INDEX_TYPE_UINT32;
+}
 
-constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
-    VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
+size_t BytesPerIndex(VkIndexType index_type) {
+    switch (index_type) {
+    case VK_INDEX_TYPE_UINT8_EXT:
+        return 1;
+    case VK_INDEX_TYPE_UINT16:
+        return 2;
+    case VK_INDEX_TYPE_UINT32:
+        return 4;
+    default:
+        UNREACHABLE_MSG("Invalid index type={}", index_type);
+        return 1;
+    }
+}
 
+template <typename T>
+std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
+    std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
+    std::ranges::transform(indices, indices.begin(),
+                           [quad, first](u32 index) { return first + index + quad * 4; });
+    return indices;
+}
 } // Anonymous namespace
 
-Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_,
-               StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
-    : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
-                                                                                 staging_pool_} {
-    buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+               VAddr cpu_addr_, u64 size_bytes_)
+    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
+    buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
         .pNext = nullptr,
         .flags = 0,
-        .size = static_cast<VkDeviceSize>(size_),
-        .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+        .size = SizeBytes(),
+        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+                 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+                 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+                 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+                 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
         .queueFamilyIndexCount = 0,
         .pQueueFamilyIndices = nullptr,
     });
-    commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+    if (runtime.device.HasDebuggingToolAttached()) {
+        buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
+    }
+    commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
 }
 
-Buffer::~Buffer() = default;
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
+                                       VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
+                                       VKUpdateDescriptorQueue& update_descriptor_queue_,
+                                       VKDescriptorPool& descriptor_pool)
+    : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
+      staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
+      uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
+      quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
 
-void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
-    const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload);
-    std::memcpy(staging.mapped_span.data(), data, data_size);
+StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
+    return staging_pool.Request(size, MemoryUsage::Upload);
+}
 
-    scheduler.RequestOutsideRenderPassOperationContext();
+StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
+    return staging_pool.Request(size, MemoryUsage::Download);
+}
 
-    const VkBuffer handle = Handle();
-    scheduler.Record([staging = staging.buffer, handle, offset, data_size,
-                      &device = device](vk::CommandBuffer cmdbuf) {
-        const VkBufferMemoryBarrier read_barrier{
-            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
-            .pNext = nullptr,
-            .srcAccessMask =
-                VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
-                VK_ACCESS_HOST_WRITE_BIT |
-                (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
-            .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
-            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .buffer = handle,
-            .offset = offset,
-            .size = data_size,
-        };
-        const VkBufferMemoryBarrier write_barrier{
-            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
-            .pNext = nullptr,
-            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
-            .dstAccessMask = UPLOAD_ACCESS_BARRIERS,
-            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .buffer = handle,
-            .offset = offset,
-            .size = data_size,
-        };
+void BufferCacheRuntime::Finish() {
+    scheduler.Finish();
+}
+
+void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
+                                    std::span<const VideoCommon::BufferCopy> copies) {
+    static constexpr VkMemoryBarrier READ_BARRIER{
+        .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+        .pNext = nullptr,
+        .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+    };
+    static constexpr VkMemoryBarrier WRITE_BARRIER{
+        .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+        .pNext = nullptr,
+        .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+    };
+    // Measuring a popular game, this number never exceeds the specified size once data is warmed up
+    boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
+    std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
-                               0, read_barrier);
-        cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
-        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
-                               write_barrier);
+                               0, READ_BARRIER);
+        cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                               0, WRITE_BARRIER);
     });
 }
 
-void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
-    auto staging = staging_pool.Request(data_size, MemoryUsage::Download);
-    scheduler.RequestOutsideRenderPassOperationContext();
+void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
+                                         u32 base_vertex, u32 num_indices, VkBuffer buffer,
+                                         u32 offset, [[maybe_unused]] u32 size) {
+    VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
+    VkDeviceSize vk_offset = offset;
+    VkBuffer vk_buffer = buffer;
+    if (topology == PrimitiveTopology::Quads) {
+        vk_index_type = VK_INDEX_TYPE_UINT32;
+        std::tie(vk_buffer, vk_offset) =
+            quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
+    } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
+        vk_index_type = VK_INDEX_TYPE_UINT16;
+        std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
+    }
+    if (vk_buffer == VK_NULL_HANDLE) {
+        // Vulkan doesn't support null index buffers. Replace it with our own null buffer.
+        ReserveNullIndexBuffer();
+        vk_buffer = *null_index_buffer;
+    }
+    scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
+        cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
+    });
+}
 
-    const VkBuffer handle = Handle();
-    scheduler.Record(
-        [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
-            const VkBufferMemoryBarrier barrier{
-                .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
-                .pNext = nullptr,
-                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
-                .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
-                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .buffer = handle,
-                .offset = offset,
-                .size = data_size,
-            };
-
-            cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
-                                       VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
-                                       VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
-                                   VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
-            cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
-        });
-    scheduler.Finish();
+void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
+    ReserveQuadArrayLUT(first + count, true);
 
-    std::memcpy(data, staging.mapped_span.data(), data_size);
+    // The LUT has the indices 0, 1, 2, and 3 copied as an array
+    // To apply these 'first' offsets we can apply an offset based on the modulus.
+    const VkIndexType index_type = quad_array_lut_index_type;
+    const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
+    const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
+    scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
+        cmdbuf.BindIndexBuffer(buffer, offset, index_type);
+    });
 }
 
-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
-                      std::size_t copy_size) {
-    scheduler.RequestOutsideRenderPassOperationContext();
+void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
+                                          u32 stride) {
+    if (device.IsExtExtendedDynamicStateSupported()) {
+        scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
+            const VkDeviceSize vk_offset = offset;
+            const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
+            const VkDeviceSize vk_stride = stride;
+            cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
+        });
+    } else {
+        scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
+            cmdbuf.BindVertexBuffer(index, buffer, offset);
+        });
+    }
+}
 
-    const VkBuffer dst_buffer = Handle();
-    scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
-                      copy_size](vk::CommandBuffer cmdbuf) {
-        cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size});
-
-        std::array<VkBufferMemoryBarrier, 2> barriers;
-        barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
-        barriers[0].pNext = nullptr;
-        barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
-        barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
-        barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barriers[0].buffer = src_buffer;
-        barriers[0].offset = src_offset;
-        barriers[0].size = copy_size;
-        barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
-        barriers[1].pNext = nullptr;
-        barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
-        barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
-        barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barriers[1].buffer = dst_buffer;
-        barriers[1].offset = dst_offset;
-        barriers[1].size = copy_size;
-        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
-                               barriers, {});
+void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
+                                                     u32 size) {
+    if (!device.IsExtTransformFeedbackSupported()) {
+        // Already logged in the rasterizer
+        return;
+    }
+    scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
+        const VkDeviceSize vk_offset = offset;
+        const VkDeviceSize vk_size = size;
+        cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
     });
 }
 
-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
-                             Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                             const Device& device_, MemoryAllocator& memory_allocator_,
-                             VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
-                             StagingBufferPool& staging_pool_)
-    : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
-                                                                 cpu_memory_, stream_buffer_},
-      device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
-      staging_pool{staging_pool_} {}
+void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
+    update_descriptor_queue.AddBuffer(buffer, offset, size);
+}
 
-VKBufferCache::~VKBufferCache() = default;
+void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
+    if (num_indices <= current_num_indices) {
+        return;
+    }
+    if (wait_for_idle) {
+        scheduler.Finish();
+    }
+    current_num_indices = num_indices;
+    quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
 
-std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
-    return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr,
-                                    size);
+    const u32 num_quads = num_indices / 4;
+    const u32 num_triangle_indices = num_quads * 6;
+    const u32 num_first_offset_copies = 4;
+    const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
+    const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
+    quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .size = size_bytes,
+        .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+    });
+    if (device.HasDebuggingToolAttached()) {
+        quad_array_lut.SetObjectNameEXT("Quad LUT");
+    }
+    quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
+
+    const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
+    u8* staging_data = staging.mapped_span.data();
+    const size_t quad_size = bytes_per_index * 6;
+    for (u32 first = 0; first < num_first_offset_copies; ++first) {
+        for (u32 quad = 0; quad < num_quads; ++quad) {
+            switch (quad_array_lut_index_type) {
+            case VK_INDEX_TYPE_UINT8_EXT:
+                std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
+                break;
+            case VK_INDEX_TYPE_UINT16:
+                std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
+                break;
+            case VK_INDEX_TYPE_UINT32:
+                std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
+                break;
+            default:
+                UNREACHABLE();
+                break;
+            }
+            staging_data += quad_size;
+        }
+    }
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
+                      dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
+        const VkBufferCopy copy{
+            .srcOffset = src_offset,
+            .dstOffset = 0,
+            .size = size_bytes,
+        };
+        const VkBufferMemoryBarrier write_barrier{
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = dst_buffer,
+            .offset = 0,
+            .size = size_bytes,
+        };
+        cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+                               0, write_barrier);
+    });
 }
 
-VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
-    size = std::max(size, std::size_t(4));
-    const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal);
+void BufferCacheRuntime::ReserveNullIndexBuffer() {
+    if (null_index_buffer) {
+        return;
+    }
+    null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .size = 4,
+        .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+    });
+    if (device.HasDebuggingToolAttached()) {
+        null_index_buffer.SetObjectNameEXT("Null index buffer");
+    }
+    null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal);
+
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) {
-        cmdbuf.FillBuffer(buffer, 0, size, 0);
+    scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) {
+        cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
     });
-    return {empty.buffer, 0, 0};
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 41d577510..041e6515c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -4,69 +4,124 @@
 
 #pragma once
 
-#include <memory>
-
-#include "common/common_types.h"
 #include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
 #include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
 
 class Device;
+class VKDescriptorPool;
 class VKScheduler;
+class VKUpdateDescriptorQueue;
 
-class Buffer final : public VideoCommon::BufferBlock {
-public:
-    explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
-                    StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
-    ~Buffer();
-
-    void Upload(std::size_t offset, std::size_t data_size, const u8* data);
+class BufferCacheRuntime;
 
-    void Download(std::size_t offset, std::size_t data_size, u8* data);
-
-    void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
-                  std::size_t copy_size);
+class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
+public:
+    explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
+    explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+                    VAddr cpu_addr_, u64 size_bytes_);
 
-    VkBuffer Handle() const {
+    [[nodiscard]] VkBuffer Handle() const noexcept {
         return *buffer;
     }
 
-    u64 Address() const {
-        return 0;
+    operator VkBuffer() const noexcept {
+        return *buffer;
     }
 
 private:
-    const Device& device;
-    VKScheduler& scheduler;
-    StagingBufferPool& staging_pool;
-
     vk::Buffer buffer;
     MemoryCommit commit;
 };
 
-class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
+class BufferCacheRuntime {
+    friend Buffer;
+
+    using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
+    using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
+
 public:
-    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
-                           Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
-                           const Device& device, MemoryAllocator& memory_allocator,
-                           VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
-                           StagingBufferPool& staging_pool);
-    ~VKBufferCache();
+    explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
+                                VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
+                                VKUpdateDescriptorQueue& update_descriptor_queue_,
+                                VKDescriptorPool& descriptor_pool);
+
+    void Finish();
+
+    [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
+
+    [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
+
+    void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
+                    std::span<const VideoCommon::BufferCopy> copies);
+
+    void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
+                         u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
 
-    BufferInfo GetEmptyBuffer(std::size_t size) override;
+    void BindQuadArrayIndexBuffer(u32 first, u32 count);
 
-protected:
-    std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+    void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
+
+    void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
+
+    std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
+                                          [[maybe_unused]] u32 binding_index, u32 size) {
+        const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
+        BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
+        return ref.mapped_span;
+    }
+
+    void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
+        BindBuffer(buffer, offset, size);
+    }
+
+    void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
+                           [[maybe_unused]] bool is_written) {
+        BindBuffer(buffer, offset, size);
+    }
 
 private:
+    void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
+
+    void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
+
+    void ReserveNullIndexBuffer();
+
     const Device& device;
     MemoryAllocator& memory_allocator;
     VKScheduler& scheduler;
     StagingBufferPool& staging_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+
+    vk::Buffer quad_array_lut;
+    MemoryCommit quad_array_lut_commit;
+    VkIndexType quad_array_lut_index_type{};
+    u32 current_num_indices = 0;
+
+    vk::Buffer null_index_buffer;
+    MemoryCommit null_index_buffer_commit;
+
+    Uint8Pass uint8_pass;
+    QuadIndexedPass quad_index_pass;
 };
 
+struct BufferCacheParams {
+    using Runtime = Vulkan::BufferCacheRuntime;
+    using Buffer = Vulkan::Buffer;
+
+    static constexpr bool IS_OPENGL = false;
+    static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
+    static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
+    static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
+    static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
+    static constexpr bool USE_MEMORY_MAPS = true;
+};
+
+using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 5eb6a54be..2f9a7b028 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,7 +10,7 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
+#include "common/div_ceil.h"
 #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
 #include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
 #include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -22,30 +22,7 @@
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
-
 namespace {
-
-VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
-    return {
-        .binding = 0,
-        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-        .descriptorCount = 1,
-        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-        .pImmutableSamplers = nullptr,
-    };
-}
-
-VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
-    return {
-        .dstBinding = 0,
-        .dstArrayElement = 0,
-        .descriptorCount = 1,
-        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-        .offset = 0,
-        .stride = sizeof(DescriptorUpdateEntry),
-    };
-}
-
 VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
     return {
         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -162,55 +139,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
     return set;
 }
 
-QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
-                             VKDescriptorPool& descriptor_pool_,
-                             StagingBufferPool& staging_buffer_pool_,
-                             VKUpdateDescriptorQueue& update_descriptor_queue_)
-    : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
-                    BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
-                    BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
-      scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
-      update_descriptor_queue{update_descriptor_queue_} {}
-
-QuadArrayPass::~QuadArrayPass() = default;
-
-std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
-    const u32 num_triangle_vertices = (num_vertices / 4) * 6;
-    const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
-    const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
-
-    update_descriptor_queue.Acquire();
-    update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
-    const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
-
-    scheduler.RequestOutsideRenderPassOperationContext();
-
-    ASSERT(num_vertices % 4 == 0);
-    const u32 num_quads = num_vertices / 4;
-    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
-                      num_quads, first, set](vk::CommandBuffer cmdbuf) {
-        constexpr u32 dispatch_size = 1024;
-        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
-        cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
-        cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
-
-        VkBufferMemoryBarrier barrier;
-        barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
-        barrier.pNext = nullptr;
-        barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
-        barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
-        barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.buffer = buffer;
-        barrier.offset = 0;
-        barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
-        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
-                               VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
-    });
-    return {staging_ref.buffer, 0};
-}
-
 Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
                      VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
                      VKUpdateDescriptorQueue& update_descriptor_queue_)
@@ -221,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
 
 Uint8Pass::~Uint8Pass() = default;
 
-std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
-                                             u64 src_offset) {
+std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
+                                                      u32 src_offset) {
     const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
-    const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
+    const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
 
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
-    update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
+    update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
     const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
 
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
                       num_vertices](vk::CommandBuffer cmdbuf) {
-        constexpr u32 dispatch_size = 1024;
+        static constexpr u32 DISPATCH_SIZE = 1024;
+        static constexpr VkMemoryBarrier WRITE_BARRIER{
+            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+        };
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
-        cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1);
-
-        VkBufferMemoryBarrier barrier;
-        barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
-        barrier.pNext = nullptr;
-        barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
-        barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
-        barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.buffer = buffer;
-        barrier.offset = 0;
-        barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
+        cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
-                               VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
+                               VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
     });
-    return {staging_ref.buffer, 0};
+    return {staging.buffer, staging.offset};
 }
 
 QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@@ -267,9 +190,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
 
 QuadIndexedPass::~QuadIndexedPass() = default;
 
-std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
+std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
     Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
-    VkBuffer src_buffer, u64 src_offset) {
+    VkBuffer src_buffer, u32 src_offset) {
     const u32 index_shift = [index_format] {
         switch (index_format) {
         case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -286,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
     const u32 num_tri_vertices = (num_vertices / 4) * 6;
 
     const std::size_t staging_size = num_tri_vertices * sizeof(u32);
-    const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
+    const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
 
     update_descriptor_queue.Acquire();
     update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
-    update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
+    update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
     const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
 
     scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
                       num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
-        static constexpr u32 dispatch_size = 1024;
+        static constexpr u32 DISPATCH_SIZE = 1024;
+        static constexpr VkMemoryBarrier WRITE_BARRIER{
+            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+        };
         const std::array push_constants = {base_vertex, index_shift};
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
         cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
                              &push_constants);
-        cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1);
-
-        VkBufferMemoryBarrier barrier;
-        barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
-        barrier.pNext = nullptr;
-        barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
-        barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
-        barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.buffer = buffer;
-        barrier.offset = 0;
-        barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
+        cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
-                               VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
+                               VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
     });
-    return {staging_ref.buffer, 0};
+    return {staging.buffer, staging.offset};
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index f5c6f5f17..17d781d99 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -41,22 +41,6 @@ private:
     vk::ShaderModule module;
 };
 
-class QuadArrayPass final : public VKComputePass {
-public:
-    explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
-                           VKDescriptorPool& descriptor_pool_,
-                           StagingBufferPool& staging_buffer_pool_,
-                           VKUpdateDescriptorQueue& update_descriptor_queue_);
-    ~QuadArrayPass();
-
-    std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
-
-private:
-    VKScheduler& scheduler;
-    StagingBufferPool& staging_buffer_pool;
-    VKUpdateDescriptorQueue& update_descriptor_queue;
-};
-
 class Uint8Pass final : public VKComputePass {
 public:
     explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
@@ -64,7 +48,10 @@ public:
                        VKUpdateDescriptorQueue& update_descriptor_queue_);
     ~Uint8Pass();
 
-    std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
+    /// Assemble uint8 indices into an uint16 index buffer
+    /// Returns a pair with the staging buffer, and the offset where the assembled data is
+    std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
+                                               u32 src_offset);
 
 private:
     VKScheduler& scheduler;
@@ -80,9 +67,9 @@ public:
                              VKUpdateDescriptorQueue& update_descriptor_queue_);
     ~QuadIndexedPass();
 
-    std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
-                                      u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
-                                      u64 src_offset);
+    std::pair<VkBuffer, VkDeviceSize> Assemble(
+        Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
+        u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
 
 private:
     VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 6cd00884d..3bec48d14 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -45,8 +45,8 @@ void InnerFence::Wait() {
 }
 
 VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
-                               Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
-                               VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
+                               TextureCache& texture_cache_, BufferCache& buffer_cache_,
+                               VKQueryCache& query_cache_, const Device& device_,
                                VKScheduler& scheduler_)
     : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
       scheduler{scheduler_} {}
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 9c5e5aa8f..2f8322d29 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -22,7 +22,6 @@ class RasterizerInterface;
 namespace Vulkan {
 
 class Device;
-class VKBufferCache;
 class VKQueryCache;
 class VKScheduler;
 
@@ -45,14 +44,14 @@ private:
 using Fence = std::shared_ptr<InnerFence>;
 
 using GenericFenceManager =
-    VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
+    VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
 
 class VKFenceManager final : public GenericFenceManager {
 public:
-    explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
-                            Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
-                            VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
-                            VKScheduler& scheduler_);
+    explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+                            TextureCache& texture_cache, BufferCache& buffer_cache,
+                            VKQueryCache& query_cache, const Device& device,
+                            VKScheduler& scheduler);
 
 protected:
     Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index d50dca604..fc6dd83eb 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -221,9 +221,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
     std::vector<VkVertexInputBindingDescription> vertex_bindings;
     std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
     for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        if (state.attributes[index].binding_index_enabled == 0) {
-            continue;
-        }
         const bool instanced = state.binding_divisors[index] != 0;
         const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
         vertex_bindings.push_back({
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index f336f1862..2c7ed654d 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -21,7 +21,12 @@ public:
 
     /// Returns the current logical tick.
     [[nodiscard]] u64 CurrentTick() const noexcept {
-        return current_tick;
+        return current_tick.load(std::memory_order_relaxed);
+    }
+
+    /// Returns the last known GPU tick.
+    [[nodiscard]] u64 KnownGpuTick() const noexcept {
+        return gpu_tick.load(std::memory_order_relaxed);
     }
 
     /// Returns the timeline semaphore handle.
@@ -31,7 +36,7 @@ public:
 
     /// Returns true when a tick has been hit by the GPU.
     [[nodiscard]] bool IsFree(u64 tick) {
-        return gpu_tick >= tick;
+        return gpu_tick.load(std::memory_order_relaxed) >= tick;
     }
 
     /// Advance to the logical tick.
@@ -41,7 +46,7 @@ public:
 
     /// Refresh the known GPU tick
     void Refresh() {
-        gpu_tick = semaphore.GetCounter();
+        gpu_tick.store(semaphore.GetCounter(), std::memory_order_relaxed);
     }
 
     /// Waits for a tick to be hit on the GPU
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f0a111829..dfd38f575 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -8,8 +8,6 @@
 #include <mutex>
 #include <vector>
 
-#include <boost/container/static_vector.hpp>
-
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
@@ -24,7 +22,6 @@
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_compute_pass.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25
 MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
 MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
 MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
 MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
 
 namespace {
+struct DrawParams {
+    u32 base_instance;
+    u32 num_instances;
+    u32 base_vertex;
+    u32 num_vertices;
+    bool is_indexed;
+};
 
 constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
 
@@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
     const float width = src.scale_x * 2.0f;
     const float height = src.scale_y * 2.0f;
     const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
-
     VkViewport viewport{
         .x = src.translate_x - src.scale_x,
         .y = src.translate_y - src.scale_y,
@@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
         .minDepth = src.translate_z - src.scale_z * reduce_z,
         .maxDepth = src.translate_z + src.scale_z,
     };
-
     if (!device.IsExtDepthRangeUnrestrictedSupported()) {
         viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
         viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
     }
-
     return viewport;
 }
 
@@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
     return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
 }
 
-template <size_t N>
-std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
-    std::array<VkDeviceSize, N> expanded;
-    std::copy(strides.begin(), strides.end(), expanded.begin());
-    return expanded;
-}
-
 ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
     if (entry.is_buffer) {
         return ImageViewType::e2D;
@@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca
     }
 }
 
-} // Anonymous namespace
-
-class BufferBindings final {
-public:
-    void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) {
-        vertex.buffers[vertex.num_buffers] = buffer;
-        vertex.offsets[vertex.num_buffers] = offset;
-        vertex.sizes[vertex.num_buffers] = size;
-        vertex.strides[vertex.num_buffers] = static_cast<u16>(stride);
-        ++vertex.num_buffers;
-    }
-
-    void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) {
-        index.buffer = buffer;
-        index.offset = offset;
-        index.type = type;
-    }
-
-    void Bind(const Device& device, VKScheduler& scheduler) const {
-        // Use this large switch case to avoid dispatching more memory in the record lambda than
-        // what we need. It looks horrible, but it's the best we can do on standard C++.
-        switch (vertex.num_buffers) {
-        case 0:
-            return BindStatic<0>(device, scheduler);
-        case 1:
-            return BindStatic<1>(device, scheduler);
-        case 2:
-            return BindStatic<2>(device, scheduler);
-        case 3:
-            return BindStatic<3>(device, scheduler);
-        case 4:
-            return BindStatic<4>(device, scheduler);
-        case 5:
-            return BindStatic<5>(device, scheduler);
-        case 6:
-            return BindStatic<6>(device, scheduler);
-        case 7:
-            return BindStatic<7>(device, scheduler);
-        case 8:
-            return BindStatic<8>(device, scheduler);
-        case 9:
-            return BindStatic<9>(device, scheduler);
-        case 10:
-            return BindStatic<10>(device, scheduler);
-        case 11:
-            return BindStatic<11>(device, scheduler);
-        case 12:
-            return BindStatic<12>(device, scheduler);
-        case 13:
-            return BindStatic<13>(device, scheduler);
-        case 14:
-            return BindStatic<14>(device, scheduler);
-        case 15:
-            return BindStatic<15>(device, scheduler);
-        case 16:
-            return BindStatic<16>(device, scheduler);
-        case 17:
-            return BindStatic<17>(device, scheduler);
-        case 18:
-            return BindStatic<18>(device, scheduler);
-        case 19:
-            return BindStatic<19>(device, scheduler);
-        case 20:
-            return BindStatic<20>(device, scheduler);
-        case 21:
-            return BindStatic<21>(device, scheduler);
-        case 22:
-            return BindStatic<22>(device, scheduler);
-        case 23:
-            return BindStatic<23>(device, scheduler);
-        case 24:
-            return BindStatic<24>(device, scheduler);
-        case 25:
-            return BindStatic<25>(device, scheduler);
-        case 26:
-            return BindStatic<26>(device, scheduler);
-        case 27:
-            return BindStatic<27>(device, scheduler);
-        case 28:
-            return BindStatic<28>(device, scheduler);
-        case 29:
-            return BindStatic<29>(device, scheduler);
-        case 30:
-            return BindStatic<30>(device, scheduler);
-        case 31:
-            return BindStatic<31>(device, scheduler);
-        case 32:
-            return BindStatic<32>(device, scheduler);
-        }
-        UNREACHABLE();
-    }
-
-private:
-    // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
-    struct {
-        size_t num_buffers = 0;
-        std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
-        std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
-        std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
-        std::array<u16, Maxwell::NumVertexArrays> strides;
-    } vertex;
-
-    struct {
-        VkBuffer buffer = nullptr;
-        VkDeviceSize offset;
-        VkIndexType type;
-    } index;
-
-    template <size_t N>
-    void BindStatic(const Device& device, VKScheduler& scheduler) const {
-        if (device.IsExtExtendedDynamicStateSupported()) {
-            if (index.buffer) {
-                BindStatic<N, true, true>(scheduler);
-            } else {
-                BindStatic<N, false, true>(scheduler);
-            }
-        } else {
-            if (index.buffer) {
-                BindStatic<N, true, false>(scheduler);
-            } else {
-                BindStatic<N, false, false>(scheduler);
-            }
-        }
-    }
-
-    template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
-    void BindStatic(VKScheduler& scheduler) const {
-        static_assert(N <= Maxwell::NumVertexArrays);
-        if constexpr (N == 0) {
-            return;
-        }
-
-        std::array<VkBuffer, N> buffers;
-        std::array<VkDeviceSize, N> offsets;
-        std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
-        std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
-
-        if constexpr (has_extended_dynamic_state) {
-            // With extended dynamic states we can specify the length and stride of a vertex buffer
-            std::array<VkDeviceSize, N> sizes;
-            std::array<u16, N> strides;
-            std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
-            std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
-
-            if constexpr (is_indexed) {
-                scheduler.Record(
-                    [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) {
-                        cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
-                        cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
-                                                     offsets.data(), sizes.data(),
-                                                     ExpandStrides(strides).data());
-                    });
-            } else {
-                scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) {
-                    cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
-                                                 offsets.data(), sizes.data(),
-                                                 ExpandStrides(strides).data());
-                });
-            }
-            return;
-        }
-
-        if constexpr (is_indexed) {
-            // Indexed draw
-            scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
-                cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
-                cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
-            });
-        } else {
-            // Array draw
-            scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) {
-                cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
-            });
-        }
-    }
-};
-
-void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
-    if (is_indexed) {
-        cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance);
-    } else {
-        cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance);
+DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
+                          bool is_indexed) {
+    DrawParams params{
+        .base_instance = regs.vb_base_instance,
+        .num_instances = is_instanced ? num_instances : 1,
+        .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
+        .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
+        .is_indexed = is_indexed,
+    };
+    if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
+        // 6 triangle vertices per quad, base vertex is part of the index
+        // See BindQuadArrayIndexBuffer for more details
+        params.num_vertices = (params.num_vertices / 4) * 6;
+        params.base_vertex = 0;
+        params.is_indexed = true;
     }
+    return params;
 }
+} // Anonymous namespace
 
 RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
                                    Tegra::MemoryManager& gpu_memory_,
@@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
     : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
       gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
       screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
-      state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler),
+      state_tracker{state_tracker_}, scheduler{scheduler_},
       staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
       update_descriptor_queue(device, scheduler),
       blit_image(device, scheduler, state_tracker, descriptor_pool),
-      quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
-      quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
-      uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
       texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image},
       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+      buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
+                           update_descriptor_queue, descriptor_pool),
+      buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
       pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
                      descriptor_pool, update_descriptor_queue),
-      buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler,
-                   stream_buffer, staging_pool),
       query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
-      fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler),
+      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
       wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
     scheduler.SetQueryCache(query_cache);
     if (device.UseAsynchronousShaders()) {
@@ -446,52 +267,51 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     query_cache.UpdateCounters();
 
-    GraphicsPipelineCacheKey key;
-    key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
-
-    buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
+    graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
 
-    BufferBindings buffer_bindings;
-    const DrawParameters draw_params =
-        SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
+    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
 
-    auto lock = texture_cache.AcquireLock();
     texture_cache.SynchronizeGraphicsDescriptors();
-
     texture_cache.UpdateRenderTargets(false);
 
     const auto shaders = pipeline_cache.GetShaders();
-    key.shaders = GetShaderAddresses(shaders);
-    SetupShaderDescriptors(shaders);
+    graphics_key.shaders = GetShaderAddresses(shaders);
 
-    buffer_cache.Unmap();
+    SetupShaderDescriptors(shaders, is_indexed);
 
     const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
-    key.renderpass = framebuffer->RenderPass();
+    graphics_key.renderpass = framebuffer->RenderPass();
 
-    auto* const pipeline =
-        pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders);
+    VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
+        graphics_key, framebuffer->NumColorBuffers(), async_shaders);
     if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
         // Async graphics pipeline was not ready.
         return;
     }
 
-    buffer_bindings.Bind(device, scheduler);
-
     BeginTransformFeedback();
 
     scheduler.RequestRenderpass(framebuffer);
     scheduler.BindGraphicsPipeline(pipeline->GetHandle());
     UpdateDynamicStates();
 
-    const auto pipeline_layout = pipeline->GetLayout();
-    const auto descriptor_set = pipeline->CommitDescriptorSet();
+    const auto& regs = maxwell3d.regs;
+    const u32 num_instances = maxwell3d.mme_draw.instance_count;
+    const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
+    const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
+    const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
     scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
         if (descriptor_set) {
             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
-                                      DESCRIPTOR_SET, descriptor_set, {});
+                                      DESCRIPTOR_SET, descriptor_set, nullptr);
+        }
+        if (draw_params.is_indexed) {
+            cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
+                               draw_params.base_vertex, draw_params.base_instance);
+        } else {
+            cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
+                        draw_params.base_vertex, draw_params.base_instance);
         }
-        draw_params.Draw(cmdbuf);
     });
 
     EndTransformFeedback();
@@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() {
         return;
     }
 
-    auto lock = texture_cache.AcquireLock();
+    std::scoped_lock lock{texture_cache.mutex};
     texture_cache.UpdateRenderTargets(true);
     const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
     const VkExtent2D render_area = framebuffer->RenderArea();
@@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() {
     if (use_stencil) {
         aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
     }
-
     scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
                       clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
         VkClearAttachment attachment;
@@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
     auto& pipeline = pipeline_cache.GetComputePipeline({
         .shader = code_addr,
         .shared_memory_size = launch_desc.shared_alloc,
-        .workgroup_size =
-            {
-                launch_desc.block_dim_x,
-                launch_desc.block_dim_y,
-                launch_desc.block_dim_z,
-            },
+        .workgroup_size{
+            launch_desc.block_dim_x,
+            launch_desc.block_dim_y,
+            launch_desc.block_dim_z,
+        },
     });
 
     // Compute dispatches can't be executed inside a renderpass
@@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
     image_view_indices.clear();
     sampler_handles.clear();
 
-    auto lock = texture_cache.AcquireLock();
-    texture_cache.SynchronizeComputeDescriptors();
+    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
 
     const auto& entries = pipeline.GetEntries();
+    buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
+    buffer_cache.UnbindComputeStorageBuffers();
+    u32 ssbo_index = 0;
+    for (const auto& buffer : entries.global_buffers) {
+        buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
+                                              buffer.is_written);
+        ++ssbo_index;
+    }
+    buffer_cache.UpdateComputeBuffers();
+
+    texture_cache.SynchronizeComputeDescriptors();
+
     SetupComputeUniformTexels(entries);
     SetupComputeTextures(entries);
     SetupComputeStorageTexels(entries);
@@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
     const std::span indices_span(image_view_indices.data(), image_view_indices.size());
     texture_cache.FillComputeImageViews(indices_span, image_view_ids);
 
-    buffer_cache.Map(CalculateComputeStreamBufferSize());
-
     update_descriptor_queue.Acquire();
 
-    SetupComputeConstBuffers(entries);
-    SetupComputeGlobalBuffers(entries);
+    buffer_cache.BindHostComputeBuffers();
 
     ImageViewId* image_view_id_ptr = image_view_ids.data();
     VkSampler* sampler_ptr = sampler_handles.data();
     PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
                          sampler_ptr);
 
-    buffer_cache.Unmap();
-
     const VkPipeline pipeline_handle = pipeline.GetHandle();
     const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
     const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
@@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
     query_cache.Query(gpu_addr, type, timestamp);
 }
 
+void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+                                                 u32 size) {
+    buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
+}
+
 void RasterizerVulkan::FlushAll() {}
 
 void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
@@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
         return;
     }
     {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
         texture_cache.DownloadMemory(addr, size);
     }
-    buffer_cache.FlushRegion(addr, size);
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.DownloadMemory(addr, size);
+    }
     query_cache.FlushRegion(addr, size);
 }
 
 bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
+    std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
     if (!Settings::IsGPULevelHigh()) {
-        return buffer_cache.MustFlushRegion(addr, size);
+        return buffer_cache.IsRegionGpuModified(addr, size);
     }
     return texture_cache.IsRegionGpuModified(addr, size) ||
-           buffer_cache.MustFlushRegion(addr, size);
+           buffer_cache.IsRegionGpuModified(addr, size);
 }
 
 void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
@@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
         return;
     }
     {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
         texture_cache.WriteMemory(addr, size);
     }
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.WriteMemory(addr, size);
+    }
     pipeline_cache.InvalidateRegion(addr, size);
-    buffer_cache.InvalidateRegion(addr, size);
     query_cache.InvalidateRegion(addr, size);
 }
 
@@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
     if (addr == 0 || size == 0) {
         return;
     }
+    pipeline_cache.OnCPUWrite(addr, size);
     {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
         texture_cache.WriteMemory(addr, size);
     }
-    pipeline_cache.OnCPUWrite(addr, size);
-    buffer_cache.OnCPUWrite(addr, size);
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.CachedWriteMemory(addr, size);
+    }
 }
 
 void RasterizerVulkan::SyncGuestHost() {
-    buffer_cache.SyncGuestHost();
     pipeline_cache.SyncGuestHost();
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.FlushCachedWrites();
+    }
 }
 
 void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
     {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
         texture_cache.UnmapMemory(addr, size);
     }
-    buffer_cache.OnCPUWrite(addr, size);
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.WriteMemory(addr, size);
+    }
     pipeline_cache.OnCPUWrite(addr, size);
 }
 
@@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() {
     draw_counter = 0;
     update_descriptor_queue.TickFrame();
     fence_manager.TickFrame();
-    buffer_cache.TickFrame();
     staging_pool.TickFrame();
     {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
         texture_cache.TickFrame();
     }
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.TickFrame();
+    }
 }
 
 bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
                                              const Tegra::Engines::Fermi2D::Surface& dst,
                                              const Tegra::Engines::Fermi2D::Config& copy_config) {
-    auto lock = texture_cache.AcquireLock();
+    std::scoped_lock lock{texture_cache.mutex};
     texture_cache.BlitImage(dst, src, copy_config);
     return true;
 }
@@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
     if (!framebuffer_addr) {
         return false;
     }
-
-    auto lock = texture_cache.AcquireLock();
+    std::scoped_lock lock{texture_cache.mutex};
     ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
     if (!image_view) {
         return false;
     }
-
     screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
     screen_info.width = image_view->size.width;
     screen_info.height = image_view->size.height;
@@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() {
     draw_counter = 0;
 }
 
-RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
-                                                                 BufferBindings& buffer_bindings,
-                                                                 bool is_indexed,
-                                                                 bool is_instanced) {
-    MICROPROFILE_SCOPE(Vulkan_Geometry);
-
-    const auto& regs = maxwell3d.regs;
-
-    SetupVertexArrays(buffer_bindings);
-
-    const u32 base_instance = regs.vb_base_instance;
-    const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
-    const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
-    const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
-
-    DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
-    SetupIndexBuffer(buffer_bindings, params, is_indexed);
-
-    return params;
-}
-
 void RasterizerVulkan::SetupShaderDescriptors(
-    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
+    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
     image_view_indices.clear();
     sampler_handles.clear();
     for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
@@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors(
         if (!shader) {
             continue;
         }
-        const auto& entries = shader->GetEntries();
+        const ShaderEntries& entries = shader->GetEntries();
         SetupGraphicsUniformTexels(entries, stage);
         SetupGraphicsTextures(entries, stage);
         SetupGraphicsStorageTexels(entries, stage);
         SetupGraphicsImages(entries, stage);
+
+        buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
+        buffer_cache.UnbindGraphicsStorageBuffers(stage);
+        u32 ssbo_index = 0;
+        for (const auto& buffer : entries.global_buffers) {
+            buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
+                                                   buffer.cbuf_offset, buffer.is_written);
+            ++ssbo_index;
+        }
     }
     const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+    buffer_cache.UpdateGraphicsBuffers(is_indexed);
     texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
 
+    buffer_cache.BindHostGeometryBuffers(is_indexed);
+
     update_descriptor_queue.Acquire();
 
     ImageViewId* image_view_id_ptr = image_view_ids.data();
@@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
         if (!shader) {
             continue;
         }
-        const auto& entries = shader->GetEntries();
-        SetupGraphicsConstBuffers(entries, stage);
-        SetupGraphicsGlobalBuffers(entries, stage);
-        PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
-                             sampler_ptr);
+        buffer_cache.BindHostStageBuffers(stage);
+        PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
+                             image_view_id_ptr, sampler_ptr);
     }
 }
 
@@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() {
         LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
         return;
     }
-
     UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
-
-    UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
-    UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
-    UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
-
-    const auto& binding = regs.tfb_bindings[0];
-    UNIMPLEMENTED_IF(binding.buffer_enable == 0);
-    UNIMPLEMENTED_IF(binding.buffer_offset != 0);
-
-    const GPUVAddr gpu_addr = binding.Address();
-    const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
-    const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
-
-    scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
-        cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
-        cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
-    });
+    scheduler.Record(
+        [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
 }
 
 void RasterizerVulkan::EndTransformFeedback() {
@@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() {
     if (!device.IsExtTransformFeedbackSupported()) {
         return;
     }
-
     scheduler.Record(
         [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
 }
 
-void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
-    const auto& regs = maxwell3d.regs;
-
-    for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        const auto& vertex_array = regs.vertex_array[index];
-        if (!vertex_array.IsEnabled()) {
-            continue;
-        }
-        const GPUVAddr start{vertex_array.StartAddress()};
-        const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
-
-        ASSERT(end >= start);
-        const size_t size = end - start;
-        if (size == 0) {
-            buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
-            continue;
-        }
-        const auto info = buffer_cache.UploadMemory(start, size);
-        buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
-    }
-}
-
-void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
-                                        bool is_indexed) {
-    if (params.num_vertices == 0) {
-        return;
-    }
-    const auto& regs = maxwell3d.regs;
-    switch (regs.draw.topology) {
-    case Maxwell::PrimitiveTopology::Quads: {
-        if (!params.is_indexed) {
-            const auto [buffer, offset] =
-                quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
-            buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
-            params.base_vertex = 0;
-            params.num_vertices = params.num_vertices * 6 / 4;
-            params.is_indexed = true;
-            break;
-        }
-        const GPUVAddr gpu_addr = regs.index_array.IndexStart();
-        const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
-        VkBuffer buffer = info.handle;
-        u64 offset = info.offset;
-        std::tie(buffer, offset) = quad_indexed_pass.Assemble(
-            regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
-
-        buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
-        params.num_vertices = (params.num_vertices / 4) * 6;
-        params.base_vertex = 0;
-        break;
-    }
-    default: {
-        if (!is_indexed) {
-            break;
-        }
-        const GPUVAddr gpu_addr = regs.index_array.IndexStart();
-        const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
-        VkBuffer buffer = info.handle;
-        u64 offset = info.offset;
-
-        auto format = regs.index_array.format;
-        const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
-        if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
-            std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
-            format = Maxwell::IndexFormat::UnsignedShort;
-        }
-
-        buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
-        break;
-    }
-    }
-}
-
-void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
-    MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
-    const auto& shader_stage = maxwell3d.state.shader_stages[stage];
-    for (const auto& entry : entries.const_buffers) {
-        SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
-    }
-}
-
-void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
-    MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
-    const auto& cbufs{maxwell3d.state.shader_stages[stage]};
-
-    for (const auto& entry : entries.global_buffers) {
-        const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
-        SetupGlobalBuffer(entry, addr);
-    }
-}
-
 void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
-    MICROPROFILE_SCOPE(Vulkan_Textures);
     const auto& regs = maxwell3d.regs;
     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
     for (const auto& entry : entries.uniform_texels) {
@@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries,
 }
 
 void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
-    MICROPROFILE_SCOPE(Vulkan_Textures);
     const auto& regs = maxwell3d.regs;
     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
     for (const auto& entry : entries.samplers) {
@@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_
 }
 
 void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
-    MICROPROFILE_SCOPE(Vulkan_Textures);
     const auto& regs = maxwell3d.regs;
     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
     for (const auto& entry : entries.storage_texels) {
@@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries,
 }
 
 void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
-    MICROPROFILE_SCOPE(Vulkan_Images);
     const auto& regs = maxwell3d.regs;
     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
     for (const auto& entry : entries.images) {
@@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t
     }
 }
 
-void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
-    MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
-    const auto& launch_desc = kepler_compute.launch_description;
-    for (const auto& entry : entries.const_buffers) {
-        const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
-        const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
-        const Tegra::Engines::ConstBufferInfo info{
-            .address = config.Address(),
-            .size = config.size,
-            .enabled = mask[entry.GetIndex()],
-        };
-        SetupConstBuffer(entry, info);
-    }
-}
-
-void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
-    MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
-    const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
-    for (const auto& entry : entries.global_buffers) {
-        const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
-        SetupGlobalBuffer(entry, addr);
-    }
-}
-
 void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
-    MICROPROFILE_SCOPE(Vulkan_Textures);
     const bool via_header_index = kepler_compute.launch_description.linked_tsc;
     for (const auto& entry : entries.uniform_texels) {
         const TextureHandle handle =
@@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
 }
 
 void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
-    MICROPROFILE_SCOPE(Vulkan_Textures);
     const bool via_header_index = kepler_compute.launch_description.linked_tsc;
     for (const auto& entry : entries.samplers) {
         for (size_t index = 0; index < entry.size; ++index) {
@@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
 }
 
 void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
-    MICROPROFILE_SCOPE(Vulkan_Textures);
     const bool via_header_index = kepler_compute.launch_description.linked_tsc;
     for (const auto& entry : entries.storage_texels) {
         const TextureHandle handle =
@@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
 }
 
 void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
-    MICROPROFILE_SCOPE(Vulkan_Images);
     const bool via_header_index = kepler_compute.launch_description.linked_tsc;
     for (const auto& entry : entries.images) {
         const TextureHandle handle =
@@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
     }
 }
 
-void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
-                                        const Tegra::Engines::ConstBufferInfo& buffer) {
-    if (!buffer.enabled) {
-        // Set values to zero to unbind buffers
-        update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
-        return;
-    }
-    // Align the size to avoid bad std140 interactions
-    const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
-    ASSERT(size <= MaxConstbufferSize);
-
-    const u64 alignment = device.GetUniformBufferAlignment();
-    const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
-    update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
-}
-
-void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
-    const u64 actual_addr = gpu_memory.Read<u64>(address);
-    const u32 size = gpu_memory.Read<u32>(address + 8);
-
-    if (size == 0) {
-        // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
-        // because Vulkan doesn't like empty buffers.
-        // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
-        // default buffer.
-        static constexpr size_t dummy_size = 4;
-        const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
-        update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
-        return;
-    }
-
-    const auto info = buffer_cache.UploadMemory(
-        actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
-    update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
-}
-
 void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
     if (!state_tracker.TouchViewports()) {
         return;
@@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
         GetViewportState(device, regs, 8),  GetViewportState(device, regs, 9),
         GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
         GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
-        GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
+        GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
+    };
     scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
 }
 
@@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
     if (!state_tracker.TouchScissors()) {
         return;
     }
-    const std::array scissors = {
+    const std::array scissors{
         GetScissorState(regs, 0),  GetScissorState(regs, 1),  GetScissorState(regs, 2),
         GetScissorState(regs, 3),  GetScissorState(regs, 4),  GetScissorState(regs, 5),
         GetScissorState(regs, 6),  GetScissorState(regs, 7),  GetScissorState(regs, 8),
         GetScissorState(regs, 9),  GetScissorState(regs, 10), GetScissorState(regs, 11),
         GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
-        GetScissorState(regs, 15)};
+        GetScissorState(regs, 15),
+    };
     scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
 }
 
@@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
     });
 }
 
-size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
-    size_t size = CalculateVertexArraysSize();
-    if (is_indexed) {
-        size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
-    }
-    size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
-    return size;
-}
-
-size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
-    return Tegra::Engines::KeplerCompute::NumConstBuffers *
-           (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-}
-
-size_t RasterizerVulkan::CalculateVertexArraysSize() const {
-    const auto& regs = maxwell3d.regs;
-
-    size_t size = 0;
-    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        // This implementation assumes that all attributes are used in the shader.
-        const GPUVAddr start{regs.vertex_array[index].StartAddress()};
-        const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
-        DEBUG_ASSERT(end >= start);
-
-        size += (end - start) * regs.vertex_array[index].enable;
-    }
-    return size;
-}
-
-size_t RasterizerVulkan::CalculateIndexBufferSize() const {
-    return static_cast<size_t>(maxwell3d.regs.index_array.count) *
-           static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
-}
-
-size_t RasterizerVulkan::CalculateConstBufferSize(
-    const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
-    if (entry.IsIndirect()) {
-        // Buffer is accessed indirectly, so upload the entire thing
-        return buffer.size;
-    } else {
-        // Buffer is accessed directly, upload just what we use
-        return entry.GetSize();
-    }
-}
-
-VkBuffer RasterizerVulkan::DefaultBuffer() {
-    if (default_buffer) {
-        return *default_buffer;
-    }
-    default_buffer = device.GetLogical().CreateBuffer({
-        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-        .pNext = nullptr,
-        .flags = 0,
-        .size = DEFAULT_BUFFER_SIZE,
-        .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
-                 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
-        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-        .queueFamilyIndexCount = 0,
-        .pQueueFamilyIndices = nullptr,
-    });
-    default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
-
-    scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
-        cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0);
-    });
-    return *default_buffer;
-}
-
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 8e261b9bd..acea1ba2d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -18,14 +18,13 @@
 #include "video_core/renderer_vulkan/blit_image.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_compute_pass.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_fence_manager.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/shader/async_shaders.h"
@@ -49,7 +48,6 @@ namespace Vulkan {
 struct VKScreenInfo;
 
 class StateTracker;
-class BufferBindings;
 
 class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
 public:
@@ -65,6 +63,7 @@ public:
     void DispatchCompute(GPUVAddr code_addr) override;
     void ResetCounter(VideoCore::QueryType type) override;
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
+    void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
     bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -107,24 +106,11 @@ private:
 
     static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
 
-    struct DrawParameters {
-        void Draw(vk::CommandBuffer cmdbuf) const;
-
-        u32 base_instance = 0;
-        u32 num_instances = 0;
-        u32 base_vertex = 0;
-        u32 num_vertices = 0;
-        bool is_indexed = 0;
-    };
-
     void FlushWork();
 
-    /// Setups geometry buffers and state.
-    DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
-                                 bool is_indexed, bool is_instanced);
-
     /// Setup descriptors in the graphics pipeline.
-    void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
+    void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
+                                bool is_indexed);
 
     void UpdateDynamicStates();
 
@@ -132,16 +118,6 @@ private:
 
     void EndTransformFeedback();
 
-    void SetupVertexArrays(BufferBindings& buffer_bindings);
-
-    void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
-
-    /// Setup constant buffers in the graphics pipeline.
-    void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
-
-    /// Setup global buffers in the graphics pipeline.
-    void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
-
     /// Setup uniform texels in the graphics pipeline.
     void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
 
@@ -154,12 +130,6 @@ private:
     /// Setup images in the graphics pipeline.
     void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
 
-    /// Setup constant buffers in the compute pipeline.
-    void SetupComputeConstBuffers(const ShaderEntries& entries);
-
-    /// Setup global buffers in the compute pipeline.
-    void SetupComputeGlobalBuffers(const ShaderEntries& entries);
-
     /// Setup texel buffers in the compute pipeline.
     void SetupComputeUniformTexels(const ShaderEntries& entries);
 
@@ -172,11 +142,6 @@ private:
     /// Setup images in the compute pipeline.
     void SetupComputeImages(const ShaderEntries& entries);
 
-    void SetupConstBuffer(const ConstBufferEntry& entry,
-                          const Tegra::Engines::ConstBufferInfo& buffer);
-
-    void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
-
     void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
     void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
     void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -193,19 +158,6 @@ private:
     void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
     void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
 
-    size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
-
-    size_t CalculateComputeStreamBufferSize() const;
-
-    size_t CalculateVertexArraysSize() const;
-
-    size_t CalculateIndexBufferSize() const;
-
-    size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
-                                    const Tegra::Engines::ConstBufferInfo& buffer) const;
-
-    VkBuffer DefaultBuffer();
-
     Tegra::GPU& gpu;
     Tegra::MemoryManager& gpu_memory;
     Tegra::Engines::Maxwell3D& maxwell3d;
@@ -217,24 +169,21 @@ private:
     StateTracker& state_tracker;
     VKScheduler& scheduler;
 
-    VKStreamBuffer stream_buffer;
     StagingBufferPool staging_pool;
     VKDescriptorPool descriptor_pool;
     VKUpdateDescriptorQueue update_descriptor_queue;
     BlitImageHelper blit_image;
-    QuadArrayPass quad_array_pass;
-    QuadIndexedPass quad_indexed_pass;
-    Uint8Pass uint8_pass;
+
+    GraphicsPipelineCacheKey graphics_key;
 
     TextureCacheRuntime texture_cache_runtime;
     TextureCache texture_cache;
+    BufferCacheRuntime buffer_cache_runtime;
+    BufferCache buffer_cache;
     VKPipelineCache pipeline_cache;
-    VKBufferCache buffer_cache;
     VKQueryCache query_cache;
     VKFenceManager fence_manager;
 
-    vk::Buffer default_buffer;
-    MemoryCommit default_buffer_commit;
     vk::Event wfi_event;
     VideoCommon::Shader::AsyncShaders async_shaders;
 
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index ee274ac59..a8bf7bda8 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -17,21 +17,21 @@ ResourcePool::~ResourcePool() = default;
 size_t ResourcePool::CommitResource() {
     // Refresh semaphore to query updated results
     master_semaphore.Refresh();
-
-    const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> {
+    const u64 gpu_tick = master_semaphore.KnownGpuTick();
+    const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
         for (size_t iterator = begin; iterator < end; ++iterator) {
-            if (master_semaphore.IsFree(ticks[iterator])) {
+            if (gpu_tick >= ticks[iterator]) {
                 ticks[iterator] = master_semaphore.CurrentTick();
                 return iterator;
             }
         }
-        return {};
+        return std::nullopt;
     };
     // Try to find a free resource from the hinted position to the end.
-    auto found = search(free_iterator, ticks.size());
+    std::optional<size_t> found = search(hint_iterator, ticks.size());
     if (!found) {
         // Search from beginning to the hinted position.
-        found = search(0, free_iterator);
+        found = search(0, hint_iterator);
         if (!found) {
             // Both searches failed, the pool is full; handle it.
             const size_t free_resource = ManageOverflow();
@@ -41,7 +41,7 @@ size_t ResourcePool::CommitResource() {
         }
     }
     // Free iterator is hinted to the resource after the one that's been commited.
-    free_iterator = (*found + 1) % ticks.size();
+    hint_iterator = (*found + 1) % ticks.size();
     return *found;
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index a018c7ec2..9d0bb3b4d 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -36,7 +36,7 @@ private:
 
     MasterSemaphore& master_semaphore;
     size_t grow_step = 0;     ///< Number of new resources created after an overflow
-    size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
+    size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
     std::vector<u64> ticks;   ///< Ticks for each resource
 };
 
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 66004f9c0..f35c120b0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
     worker_thread.join();
 }
 
-u64 VKScheduler::CurrentTick() const noexcept {
-    return master_semaphore->CurrentTick();
-}
-
-bool VKScheduler::IsFree(u64 tick) const noexcept {
-    return master_semaphore->IsFree(tick);
-}
-
-void VKScheduler::Wait(u64 tick) {
-    master_semaphore->Wait(tick);
-}
-
 void VKScheduler::Flush(VkSemaphore semaphore) {
     SubmitExecution(semaphore);
     AllocateNewContext();
@@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
                                    VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
                                    VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-                               VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
+                               VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
                                vk::Span(barriers.data(), num_images));
     });
     state.renderpass = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 15f2987eb..3ce48e9d2 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -14,6 +14,7 @@
 #include "common/alignment.h"
 #include "common/common_types.h"
 #include "common/threadsafe_queue.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
@@ -21,7 +22,6 @@ namespace Vulkan {
 class CommandPool;
 class Device;
 class Framebuffer;
-class MasterSemaphore;
 class StateTracker;
 class VKQueryCache;
 
@@ -32,15 +32,6 @@ public:
     explicit VKScheduler(const Device& device, StateTracker& state_tracker);
     ~VKScheduler();
 
-    /// Returns the current command buffer tick.
-    [[nodiscard]] u64 CurrentTick() const noexcept;
-
-    /// Returns true when a tick has been triggered by the GPU.
-    [[nodiscard]] bool IsFree(u64 tick) const noexcept;
-
-    /// Waits for the given tick to trigger on the GPU.
-    void Wait(u64 tick);
-
     /// Sends the current execution context to the GPU.
     void Flush(VkSemaphore semaphore = nullptr);
 
@@ -82,6 +73,21 @@ public:
         (void)chunk->Record(command);
     }
 
+    /// Returns the current command buffer tick.
+    [[nodiscard]] u64 CurrentTick() const noexcept {
+        return master_semaphore->CurrentTick();
+    }
+
+    /// Returns true when a tick has been triggered by the GPU.
+    [[nodiscard]] bool IsFree(u64 tick) const noexcept {
+        return master_semaphore->IsFree(tick);
+    }
+
+    /// Waits for the given tick to trigger on the GPU.
+    void Wait(u64 tick) {
+        master_semaphore->Wait(tick);
+    }
+
     /// Returns the master timeline semaphore.
     [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
         return *master_semaphore;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 61d52b961..40e2e0d38 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -3106,7 +3106,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
         entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
     }
     for (const auto& [base, usage] : ir.GetGlobalMemory()) {
-        entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written);
+        entries.global_buffers.emplace_back(GlobalBufferEntry{
+            .cbuf_index = base.cbuf_index,
+            .cbuf_offset = base.cbuf_offset,
+            .is_written = usage.is_written,
+        });
     }
     for (const auto& sampler : ir.GetSamplers()) {
         if (sampler.is_buffer) {
@@ -3127,6 +3131,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
             entries.attributes.insert(GetGenericAttributeLocation(attribute));
         }
     }
+    for (const auto& buffer : entries.const_buffers) {
+        entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
+    }
     entries.clip_distances = ir.GetClipDistances();
     entries.shader_length = ir.GetLength();
     entries.uses_warps = ir.UsesWarps();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 26381e444..5d94132a5 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -39,24 +39,7 @@ private:
     u32 index{};
 };
 
-class GlobalBufferEntry {
-public:
-    constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
-        : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
-
-    constexpr u32 GetCbufIndex() const {
-        return cbuf_index;
-    }
-
-    constexpr u32 GetCbufOffset() const {
-        return cbuf_offset;
-    }
-
-    constexpr bool IsWritten() const {
-        return is_written;
-    }
-
-private:
+struct GlobalBufferEntry {
     u32 cbuf_index{};
     u32 cbuf_offset{};
     bool is_written{};
@@ -78,6 +61,7 @@ struct ShaderEntries {
     std::set<u32> attributes;
     std::array<bool, Maxwell::NumClipDistances> clip_distances{};
     std::size_t shader_length{};
+    u32 enabled_uniform_buffers{};
     bool uses_warps{};
 };
 
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 97fd41cc1..7a1232497 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -8,6 +8,7 @@
 
 #include <fmt/format.h>
 
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/bit_util.h"
 #include "common/common_types.h"
@@ -17,18 +18,119 @@
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
+namespace {
+// Maximum potential alignment of a Vulkan buffer
+constexpr VkDeviceSize MAX_ALIGNMENT = 256;
+// Maximum size to put elements in the stream buffer
+constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
+// Stream buffer size in bytes
+constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
+
+constexpr VkMemoryPropertyFlags HOST_FLAGS =
+    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
+
+bool IsStreamHeap(VkMemoryHeap heap) noexcept {
+    return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
+}
+
+std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
+                                       VkMemoryPropertyFlags flags) noexcept {
+    for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
+        if (((type_mask >> type_index) & 1) == 0) {
+            // Memory type is incompatible
+            continue;
+        }
+        const VkMemoryType& memory_type = props.memoryTypes[type_index];
+        if ((memory_type.propertyFlags & flags) != flags) {
+            // Memory type doesn't have the flags we want
+            continue;
+        }
+        if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
+            // Memory heap is not suitable for streaming
+            continue;
+        }
+        // Success!
+        return type_index;
+    }
+    return std::nullopt;
+}
+
+u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
+    // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
+    std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
+    if (type) {
+        return *type;
+    }
+    // Otherwise try without the DEVICE_LOCAL_BIT
+    type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
+    if (type) {
+        return *type;
+    }
+    // This should never happen, and in case it does, signal it as an out of memory situation
+    throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+}
+
+size_t Region(size_t iterator) noexcept {
+    return iterator / REGION_SIZE;
+}
+} // Anonymous namespace
 
 StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
                                      VKScheduler& scheduler_)
-    : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {}
+    : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
+    const vk::Device& dev = device.GetLogical();
+    stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .size = STREAM_BUFFER_SIZE,
+        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+                 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+    });
+    if (device.HasDebuggingToolAttached()) {
+        stream_buffer.SetObjectNameEXT("Stream Buffer");
+    }
+    VkMemoryDedicatedRequirements dedicated_reqs{
+        .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+        .pNext = nullptr,
+        .prefersDedicatedAllocation = VK_FALSE,
+        .requiresDedicatedAllocation = VK_FALSE,
+    };
+    const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
+    const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
+                                dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
+    const VkMemoryDedicatedAllocateInfo dedicated_info{
+        .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+        .pNext = nullptr,
+        .image = nullptr,
+        .buffer = *stream_buffer,
+    };
+    const auto memory_properties = device.GetPhysical().GetMemoryProperties();
+    stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
+        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+        .pNext = make_dedicated ? &dedicated_info : nullptr,
+        .allocationSize = requirements.size,
+        .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
+    });
+    if (device.HasDebuggingToolAttached()) {
+        stream_memory.SetObjectNameEXT("Stream Buffer Memory");
+    }
+    stream_buffer.BindMemory(*stream_memory, 0);
+    stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
+}
 
 StagingBufferPool::~StagingBufferPool() = default;
 
 StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
-    if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
-        return *ref;
+    if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
+        return GetStreamBuffer(size);
     }
-    return CreateStagingBuffer(size, usage);
+    return GetStagingBuffer(size, usage);
 }
 
 void StagingBufferPool::TickFrame() {
@@ -39,6 +141,52 @@ void StagingBufferPool::TickFrame() {
     ReleaseCache(MemoryUsage::Download);
 }
 
+StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
+    if (AreRegionsActive(Region(free_iterator) + 1,
+                         std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
+        // Avoid waiting for the previous usages to be free
+        return GetStagingBuffer(size, MemoryUsage::Upload);
+    }
+    const u64 current_tick = scheduler.CurrentTick();
+    std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
+              current_tick);
+    used_iterator = iterator;
+    free_iterator = std::max(free_iterator, iterator + size);
+
+    if (iterator + size >= STREAM_BUFFER_SIZE) {
+        std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
+                  current_tick);
+        used_iterator = 0;
+        iterator = 0;
+        free_iterator = size;
+
+        if (AreRegionsActive(0, Region(size) + 1)) {
+            // Avoid waiting for the previous usages to be free
+            return GetStagingBuffer(size, MemoryUsage::Upload);
+        }
+    }
+    const size_t offset = iterator;
+    iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+    return StagingBufferRef{
+        .buffer = *stream_buffer,
+        .offset = static_cast<VkDeviceSize>(offset),
+        .mapped_span = std::span<u8>(stream_pointer + offset, size),
+    };
+}
+
+bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
+    const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
+    return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
+                       [gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
+};
+
+StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
+    if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
+        return *ref;
+    }
+    return CreateStagingBuffer(size, usage);
+}
+
 std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
                                                                         MemoryUsage usage) {
     StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index d42918a47..69f7618de 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -19,11 +19,14 @@ class VKScheduler;
 
 struct StagingBufferRef {
     VkBuffer buffer;
+    VkDeviceSize offset;
     std::span<u8> mapped_span;
 };
 
 class StagingBufferPool {
 public:
+    static constexpr size_t NUM_SYNCS = 16;
+
     explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
                                VKScheduler& scheduler);
     ~StagingBufferPool();
@@ -33,6 +36,11 @@ public:
     void TickFrame();
 
 private:
+    struct StreamBufferCommit {
+        size_t upper_bound;
+        u64 tick;
+    };
+
     struct StagingBuffer {
         vk::Buffer buffer;
         MemoryCommit commit;
@@ -42,6 +50,7 @@ private:
         StagingBufferRef Ref() const noexcept {
             return {
                 .buffer = *buffer,
+                .offset = 0,
                 .mapped_span = mapped_span,
             };
         }
@@ -56,6 +65,12 @@ private:
     static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
     using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
 
+    StagingBufferRef GetStreamBuffer(size_t size);
+
+    bool AreRegionsActive(size_t region_begin, size_t region_end) const;
+
+    StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
+
     std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
 
     StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
@@ -70,6 +85,15 @@ private:
     MemoryAllocator& memory_allocator;
     VKScheduler& scheduler;
 
+    vk::Buffer stream_buffer;
+    vk::DeviceMemory stream_memory;
+    u8* stream_pointer = nullptr;
+
+    size_t iterator = 0;
+    size_t used_iterator = 0;
+    size_t free_iterator = 0;
+    std::array<u64, NUM_SYNCS> sync_ticks{};
+
     StagingBuffersCache device_local_cache;
     StagingBuffersCache upload_cache;
     StagingBuffersCache download_cache;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 1779a2e30..956f86845 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -18,9 +18,7 @@
 #define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
 
 namespace Vulkan {
-
 namespace {
-
 using namespace Dirty;
 using namespace VideoCommon::Dirty;
 using Tegra::Engines::Maxwell3D;
@@ -30,15 +28,18 @@ using Table = Maxwell3D::DirtyState::Table;
 using Flags = Maxwell3D::DirtyState::Flags;
 
 Flags MakeInvalidationFlags() {
-    static constexpr std::array INVALIDATION_FLAGS{
+    static constexpr int INVALIDATION_FLAGS[]{
         Viewports,         Scissors,  DepthBias,         BlendConstants,    DepthBounds,
         StencilProperties, CullMode,  DepthBoundsEnable, DepthTestEnable,   DepthWriteEnable,
-        DepthCompareOp,    FrontFace, StencilOp,         StencilTestEnable,
+        DepthCompareOp,    FrontFace, StencilOp,         StencilTestEnable, VertexBuffers,
     };
     Flags flags{};
     for (const int flag : INVALIDATION_FLAGS) {
         flags[flag] = true;
     }
+    for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
+        flags[index] = true;
+    }
     return flags;
 }
 
@@ -125,12 +126,40 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
     tables[0][OFF(stencil_enable)] = StencilTestEnable;
 }
 
+void SetupDirtyBlending(Tables& tables) {
+    tables[0][OFF(color_mask_common)] = Blending;
+    tables[0][OFF(independent_blend_enable)] = Blending;
+    FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
+    FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
+    FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
+}
+
+void SetupDirtyInstanceDivisors(Tables& tables) {
+    static constexpr size_t divisor_offset = 3;
+    for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
+        tables[0][OFF(instanced_arrays) + index] = InstanceDivisors;
+        tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] =
+            InstanceDivisors;
+    }
+}
+
+void SetupDirtyVertexAttributes(Tables& tables) {
+    FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes);
+}
+
+void SetupDirtyViewportSwizzles(Tables& tables) {
+    static constexpr size_t swizzle_offset = 6;
+    for (size_t index = 0; index < Regs::NumViewports; ++index) {
+        tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
+            ViewportSwizzles;
+    }
+}
 } // Anonymous namespace
 
 StateTracker::StateTracker(Tegra::GPU& gpu)
     : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
     auto& tables = gpu.Maxwell3D().dirty.tables;
-    SetupDirtyRenderTargets(tables);
+    SetupDirtyFlags(tables);
     SetupDirtyViewports(tables);
     SetupDirtyScissors(tables);
     SetupDirtyDepthBias(tables);
@@ -145,6 +174,10 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
     SetupDirtyFrontFace(tables);
     SetupDirtyStencilOp(tables);
     SetupDirtyStencilTestEnable(tables);
+    SetupDirtyBlending(tables);
+    SetupDirtyInstanceDivisors(tables);
+    SetupDirtyVertexAttributes(tables);
+    SetupDirtyViewportSwizzles(tables);
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index c335d2bdf..84e918a71 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -35,6 +35,11 @@ enum : u8 {
     StencilOp,
     StencilTestEnable,
 
+    Blending,
+    InstanceDivisors,
+    VertexAttributes,
+    ViewportSwizzles,
+
     Last
 };
 static_assert(Last <= std::numeric_limits<u8>::max());
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 725a2a05d..0b63bd6c8 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
 
 } // Anonymous namespace
 
-VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_)
-    : surface{surface_}, device{device_}, scheduler{scheduler_} {}
+VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
+                         u32 width, u32 height, bool srgb)
+    : surface{surface_}, device{device_}, scheduler{scheduler_} {
+    Create(width, height, srgb);
+}
 
 VKSwapchain::~VKSwapchain() = default;
 
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 2eadd62b3..a728511e0 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -20,7 +20,8 @@ class VKScheduler;
 
 class VKSwapchain {
 public:
-    explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler);
+    explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
+                         u32 width, u32 height, bool srgb);
     ~VKSwapchain();
 
     /// Creates (or recreates) the swapchain with a given size.
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index aa7c5d7c6..22a1014a9 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
 void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
                        VkImageAspectFlags aspect_mask, bool is_initialized,
                        std::span<const VkBufferImageCopy> copies) {
-    static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT |
-                                                  VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
-                                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+    static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
+        VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+    static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
+                                                       VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                                                       VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
     const VkImageMemoryBarrier read_barrier{
         .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
         .pNext = nullptr,
-        .srcAccessMask = ACCESS_FLAGS,
+        .srcAccessMask = WRITE_ACCESS_FLAGS,
         .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
         .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
         .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
         .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
         .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
         .image = image,
-        .subresourceRange =
-            {
-                .aspectMask = aspect_mask,
-                .baseMipLevel = 0,
-                .levelCount = VK_REMAINING_MIP_LEVELS,
-                .baseArrayLayer = 0,
-                .layerCount = VK_REMAINING_ARRAY_LAYERS,
-            },
+        .subresourceRange{
+            .aspectMask = aspect_mask,
+            .baseMipLevel = 0,
+            .levelCount = VK_REMAINING_MIP_LEVELS,
+            .baseArrayLayer = 0,
+            .layerCount = VK_REMAINING_ARRAY_LAYERS,
+        },
     };
     const VkImageMemoryBarrier write_barrier{
         .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
         .pNext = nullptr,
         .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
-        .dstAccessMask = ACCESS_FLAGS,
+        .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
         .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
         .newLayout = VK_IMAGE_LAYOUT_GENERAL,
         .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
         .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
         .image = image,
-        .subresourceRange =
-            {
-                .aspectMask = aspect_mask,
-                .baseMipLevel = 0,
-                .levelCount = VK_REMAINING_MIP_LEVELS,
-                .baseArrayLayer = 0,
-                .layerCount = VK_REMAINING_ARRAY_LAYERS,
-            },
+        .subresourceRange{
+            .aspectMask = aspect_mask,
+            .baseMipLevel = 0,
+            .levelCount = VK_REMAINING_MIP_LEVELS,
+            .baseArrayLayer = 0,
+            .layerCount = VK_REMAINING_ARRAY_LAYERS,
+        },
     };
     cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
                            read_barrier);
@@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() {
     scheduler.Finish();
 }
 
-ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
-    const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload);
-    return {
-        .handle = staging_ref.buffer,
-        .span = staging_ref.mapped_span,
-    };
+StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
+    return staging_buffer_pool.Request(size, MemoryUsage::Upload);
 }
 
-ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
-    const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download);
-    return {
-        .handle = staging_ref.buffer,
-        .span = staging_ref.mapped_span,
-    };
+StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
+    return staging_buffer_pool.Request(size, MemoryUsage::Download);
 }
 
 void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
@@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
                 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
                                  VK_ACCESS_TRANSFER_WRITE_BIT,
-                .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+                .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
                 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
                 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
             VkImageMemoryBarrier{
                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
                 .pNext = nullptr,
-                .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
-                                 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
-                                 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
-                                 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+                .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
-                                 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+                                 VK_ACCESS_TRANSFER_WRITE_BIT,
                 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
                 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
                 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@@ -828,12 +818,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
     }
 }
 
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
-                         std::span<const BufferImageCopy> copies) {
+void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
     // TODO: Move this to another API
     scheduler->RequestOutsideRenderPassOperationContext();
-    std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
-    const VkBuffer src_buffer = map.handle;
+    std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
+    const VkBuffer src_buffer = map.buffer;
     const VkImage vk_image = *image;
     const VkImageAspectFlags vk_aspect_mask = aspect_mask;
     const bool is_initialized = std::exchange(initialized, true);
@@ -843,12 +832,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
     });
 }
 
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::UploadMemory(const StagingBufferRef& map,
                          std::span<const VideoCommon::BufferCopy> copies) {
     // TODO: Move this to another API
     scheduler->RequestOutsideRenderPassOperationContext();
-    std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
-    const VkBuffer src_buffer = map.handle;
+    std::vector vk_copies = TransformBufferCopies(copies, map.offset);
+    const VkBuffer src_buffer = map.buffer;
     const VkBuffer dst_buffer = *buffer;
     scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
         // TODO: Barriers
@@ -856,13 +845,57 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
     });
 }
 
-void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
-                           std::span<const BufferImageCopy> copies) {
-    std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
-    scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
+void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
+    std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
+    scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
                        vk_copies](vk::CommandBuffer cmdbuf) {
-        // TODO: Barriers
-        cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies);
+        const VkImageMemoryBarrier read_barrier{
+            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+            .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+            .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .image = image,
+            .subresourceRange{
+                .aspectMask = aspect_mask,
+                .baseMipLevel = 0,
+                .levelCount = VK_REMAINING_MIP_LEVELS,
+                .baseArrayLayer = 0,
+                .layerCount = VK_REMAINING_ARRAY_LAYERS,
+            },
+        };
+        const VkImageMemoryBarrier image_write_barrier{
+            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = 0,
+            .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+            .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+            .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .image = image,
+            .subresourceRange{
+                .aspectMask = aspect_mask,
+                .baseMipLevel = 0,
+                .levelCount = VK_REMAINING_MIP_LEVELS,
+                .baseArrayLayer = 0,
+                .layerCount = VK_REMAINING_ARRAY_LAYERS,
+            },
+        };
+        const VkMemoryBarrier memory_write_barrier{
+            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+        };
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                               0, read_barrier);
+        cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                               0, memory_write_barrier, nullptr, image_write_barrier);
     });
 }
 
@@ -1127,7 +1160,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
         .pAttachments = attachments.data(),
         .width = key.size.width,
         .height = key.size.height,
-        .layers = static_cast<u32>(num_layers),
+        .layers = static_cast<u32>(std::max(num_layers, 1)),
     });
     if (runtime.device.HasDebuggingToolAttached()) {
         framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 8d29361a1..b08c23459 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
 #include <compare>
 #include <span>
 
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/texture_cache/texture_cache.h"
 #include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
 
 namespace Vulkan {
 
-struct ImageBufferMap {
-    [[nodiscard]] VkBuffer Handle() const noexcept {
-        return handle;
-    }
-
-    [[nodiscard]] std::span<u8> Span() const noexcept {
-        return span;
-    }
-
-    VkBuffer handle;
-    std::span<u8> span;
-};
-
 struct TextureCacheRuntime {
     const Device& device;
     VKScheduler& scheduler;
@@ -76,9 +64,9 @@ struct TextureCacheRuntime {
 
     void Finish();
 
-    [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
+    [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
 
-    [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size);
+    [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
 
     void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
                    const std::array<Offset2D, 2>& dst_region,
@@ -94,7 +82,7 @@ struct TextureCacheRuntime {
         return false;
     }
 
-    void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
+    void AccelerateImageUpload(Image&, const StagingBufferRef&,
                                std::span<const VideoCommon::SwizzleParameters>) {
         UNREACHABLE();
     }
@@ -112,13 +100,12 @@ public:
     explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
                    VAddr cpu_addr);
 
-    void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+    void UploadMemory(const StagingBufferRef& map,
                       std::span<const VideoCommon::BufferImageCopy> copies);
 
-    void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
-                      std::span<const VideoCommon::BufferCopy> copies);
+    void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
 
-    void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
+    void DownloadMemory(const StagingBufferRef& map,
                         std::span<const VideoCommon::BufferImageCopy> copies);
 
     [[nodiscard]] VkImage Handle() const noexcept {
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 3b40db9bc..02adcf9c7 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -64,6 +64,7 @@ void AsyncShaders::FreeWorkers() {
 
 void AsyncShaders::KillWorkers() {
     is_thread_exiting.store(true);
+    cv.notify_all();
     for (auto& thread : worker_threads) {
         thread.detach();
     }
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 0dbb1a31f..7fdff6e56 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -9,16 +9,7 @@
 #include <shared_mutex>
 #include <thread>
 
-// This header includes both Vulkan and OpenGL headers, this has to be fixed
-// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
-// Forcefully include glad early and undefine macros
 #include <glad/glad.h>
-#ifdef CreateEvent
-#undef CreateEvent
-#endif
-#ifdef CreateSemaphore
-#undef CreateSemaphore
-#endif
 
 #include "common/common_types.h"
 #include "video_core/renderer_opengl/gl_device.h"
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d3ea07aac..5f88537bc 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -76,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
             case SystemVariable::InvocationId:
                 return Operation(OperationCode::InvocationId);
             case SystemVariable::Ydirection:
+                uses_y_negate = true;
                 return Operation(OperationCode::YNegate);
             case SystemVariable::InvocationInfo:
                 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0c6ab0f07..1cd7c14d7 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -139,6 +139,10 @@ public:
         return uses_legacy_varyings;
     }
 
+    bool UsesYNegate() const {
+        return uses_y_negate;
+    }
+
     bool UsesWarps() const {
         return uses_warps;
     }
@@ -465,6 +469,7 @@ private:
     bool uses_instance_id{};
     bool uses_vertex_id{};
     bool uses_legacy_varyings{};
+    bool uses_y_negate{};
     bool uses_warps{};
     bool uses_indexed_samplers{};
 
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d1080300f..b1da69971 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -103,9 +103,6 @@ public:
     /// Notify the cache that a new frame has been queued
     void TickFrame();
 
-    /// Return an unique mutually exclusive lock for the cache
-    [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
-
     /// Return a constant reference to the given image view id
     [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
 
@@ -179,6 +176,8 @@ public:
     /// Return true when a CPU region is modified from the GPU
     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
 
+    std::mutex mutex;
+
 private:
     /// Iterate over all page indices in a range
     template <typename Func>
@@ -212,8 +211,8 @@ private:
     void RefreshContents(Image& image);
 
     /// Upload data from guest to an image
-    template <typename MapBuffer>
-    void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset);
+    template <typename StagingBuffer>
+    void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
 
     /// Find or create an image view from a guest descriptor
     [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
@@ -325,8 +324,6 @@ private:
 
     RenderTargets render_targets;
 
-    std::mutex mutex;
-
     std::unordered_map<TICEntry, ImageViewId> image_views;
     std::unordered_map<TSCEntry, SamplerId> samplers;
     std::unordered_map<RenderTargets, FramebufferId> framebuffers;
@@ -386,11 +383,6 @@ void TextureCache<P>::TickFrame() {
 }
 
 template <class P>
-std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
-    return std::unique_lock{mutex};
-}
-
-template <class P>
 const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
     return slot_image_views[id];
 }
@@ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
     });
     for (const ImageId image_id : images) {
         Image& image = slot_images[image_id];
-        auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes);
+        auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
         const auto copies = FullDownloadCopies(image.info);
-        image.DownloadMemory(map, 0, copies);
+        image.DownloadMemory(map, copies);
         runtime.Finish();
-        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());
+        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
     }
 }
 
@@ -757,25 +749,25 @@ void TextureCache<P>::PopAsyncFlushes() {
     for (const ImageId image_id : download_ids) {
         total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
     }
-    auto download_map = runtime.MapDownloadBuffer(total_size_bytes);
-    size_t buffer_offset = 0;
+    auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
+    const size_t original_offset = download_map.offset;
     for (const ImageId image_id : download_ids) {
         Image& image = slot_images[image_id];
         const auto copies = FullDownloadCopies(image.info);
-        image.DownloadMemory(download_map, buffer_offset, copies);
-        buffer_offset += image.unswizzled_size_bytes;
+        image.DownloadMemory(download_map, copies);
+        download_map.offset += image.unswizzled_size_bytes;
     }
     // Wait for downloads to finish
     runtime.Finish();
 
-    buffer_offset = 0;
-    const std::span<u8> download_span = download_map.Span();
+    download_map.offset = original_offset;
+    std::span<u8> download_span = download_map.mapped_span;
     for (const ImageId image_id : download_ids) {
         const ImageBase& image = slot_images[image_id];
         const auto copies = FullDownloadCopies(image.info);
-        const std::span<u8> image_download_span = download_span.subspan(buffer_offset);
-        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span);
-        buffer_offset += image.unswizzled_size_bytes;
+        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
+        download_map.offset += image.unswizzled_size_bytes;
+        download_span = download_span.subspan(image.unswizzled_size_bytes);
     }
     committed_downloads.pop();
 }
@@ -806,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) {
         LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
         return;
     }
-    auto map = runtime.MapUploadBuffer(MapSizeBytes(image));
-    UploadImageContents(image, map, 0);
+    auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
+    UploadImageContents(image, staging);
     runtime.InsertUploadMemoryBarrier();
 }
 
 template <class P>
-template <typename MapBuffer>
-void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
-    const std::span<u8> mapped_span = map.Span().subspan(buffer_offset);
+template <typename StagingBuffer>
+void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
+    const std::span<u8> mapped_span = staging.mapped_span;
     const GPUVAddr gpu_addr = image.gpu_addr;
 
     if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
         gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
         const auto uploads = FullUploadSwizzles(image.info);
-        runtime.AccelerateImageUpload(image, map, buffer_offset, uploads);
+        runtime.AccelerateImageUpload(image, staging, uploads);
     } else if (True(image.flags & ImageFlagBits::Converted)) {
         std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
         auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
         ConvertImage(unswizzled_data, image.info, mapped_span, copies);
-        image.UploadMemory(map, buffer_offset, copies);
+        image.UploadMemory(staging, copies);
     } else if (image.info.type == ImageType::Buffer) {
         const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
-        image.UploadMemory(map, buffer_offset, copies);
+        image.UploadMemory(staging, copies);
     } else {
         const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
-        image.UploadMemory(map, buffer_offset, copies);
+        image.UploadMemory(staging, copies);
     }
 }
 
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 53444e945..e1b38c6ac 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -38,19 +38,18 @@ namespace VideoCore {
 
 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
     const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
-    std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>(
-        system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec);
-
+    const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
+    auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
     auto context = emu_window.CreateSharedContext();
-    const auto scope = context->Acquire();
-
-    auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
-    if (!renderer->Init()) {
+    auto scope = context->Acquire();
+    try {
+        auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
+        gpu->BindRenderer(std::move(renderer));
+        return gpu;
+    } catch (const std::runtime_error& exception) {
+        LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
         return nullptr;
     }
-
-    gpu->BindRenderer(std::move(renderer));
-    return gpu;
 }
 
 u16 GetResolutionScaleFactor(const RendererBase& renderer) {
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 51f53bc39..34d396434 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -18,27 +18,22 @@
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 
 namespace Vulkan {
-
 namespace {
-
 namespace Alternatives {
-
-constexpr std::array Depth24UnormS8_UINT{
+constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{
     VK_FORMAT_D32_SFLOAT_S8_UINT,
     VK_FORMAT_D16_UNORM_S8_UINT,
-    VkFormat{},
+    VK_FORMAT_UNDEFINED,
 };
 
-constexpr std::array Depth16UnormS8_UINT{
+constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
     VK_FORMAT_D24_UNORM_S8_UINT,
     VK_FORMAT_D32_SFLOAT_S8_UINT,
-    VkFormat{},
+    VK_FORMAT_UNDEFINED,
 };
-
 } // namespace Alternatives
 
 constexpr std::array REQUIRED_EXTENSIONS{
-    VK_KHR_SWAPCHAIN_EXTENSION_NAME,
     VK_KHR_MAINTENANCE1_EXTENSION_NAME,
     VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
     VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
@@ -51,7 +46,14 @@ constexpr std::array REQUIRED_EXTENSIONS{
     VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
     VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
     VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
+    VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
     VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
+#ifdef _WIN32
+    VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
+#endif
+#ifdef __linux__
+    VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
+#endif
 };
 
 template <typename T>
@@ -63,9 +65,9 @@ void SetNext(void**& next, T& data) {
 constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
     switch (format) {
     case VK_FORMAT_D24_UNORM_S8_UINT:
-        return Alternatives::Depth24UnormS8_UINT.data();
+        return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();
     case VK_FORMAT_D16_UNORM_S8_UINT:
-        return Alternatives::Depth16UnormS8_UINT.data();
+        return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data();
     default:
         return nullptr;
     }
@@ -195,78 +197,77 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
                const vk::InstanceDispatch& dld_)
     : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
       format_properties{GetFormatProperties(physical)} {
-    CheckSuitability();
+    CheckSuitability(surface != nullptr);
     SetupFamilies(surface);
     SetupFeatures();
 
     const auto queue_cis = GetDeviceQueueCreateInfos();
-    const std::vector extensions = LoadExtensions();
+    const std::vector extensions = LoadExtensions(surface != nullptr);
 
     VkPhysicalDeviceFeatures2 features2{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
         .pNext = nullptr,
-        .features{},
+        .features{
+            .robustBufferAccess = true,
+            .fullDrawIndexUint32 = false,
+            .imageCubeArray = true,
+            .independentBlend = true,
+            .geometryShader = true,
+            .tessellationShader = true,
+            .sampleRateShading = false,
+            .dualSrcBlend = false,
+            .logicOp = false,
+            .multiDrawIndirect = false,
+            .drawIndirectFirstInstance = false,
+            .depthClamp = true,
+            .depthBiasClamp = true,
+            .fillModeNonSolid = false,
+            .depthBounds = false,
+            .wideLines = false,
+            .largePoints = true,
+            .alphaToOne = false,
+            .multiViewport = true,
+            .samplerAnisotropy = true,
+            .textureCompressionETC2 = false,
+            .textureCompressionASTC_LDR = is_optimal_astc_supported,
+            .textureCompressionBC = false,
+            .occlusionQueryPrecise = true,
+            .pipelineStatisticsQuery = false,
+            .vertexPipelineStoresAndAtomics = true,
+            .fragmentStoresAndAtomics = true,
+            .shaderTessellationAndGeometryPointSize = false,
+            .shaderImageGatherExtended = true,
+            .shaderStorageImageExtendedFormats = false,
+            .shaderStorageImageMultisample = is_shader_storage_image_multisample,
+            .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
+            .shaderStorageImageWriteWithoutFormat = true,
+            .shaderUniformBufferArrayDynamicIndexing = false,
+            .shaderSampledImageArrayDynamicIndexing = false,
+            .shaderStorageBufferArrayDynamicIndexing = false,
+            .shaderStorageImageArrayDynamicIndexing = false,
+            .shaderClipDistance = false,
+            .shaderCullDistance = false,
+            .shaderFloat64 = false,
+            .shaderInt64 = false,
+            .shaderInt16 = false,
+            .shaderResourceResidency = false,
+            .shaderResourceMinLod = false,
+            .sparseBinding = false,
+            .sparseResidencyBuffer = false,
+            .sparseResidencyImage2D = false,
+            .sparseResidencyImage3D = false,
+            .sparseResidency2Samples = false,
+            .sparseResidency4Samples = false,
+            .sparseResidency8Samples = false,
+            .sparseResidency16Samples = false,
+            .sparseResidencyAliased = false,
+            .variableMultisampleRate = false,
+            .inheritedQueries = false,
+        },
     };
     const void* first_next = &features2;
     void** next = &features2.pNext;
 
-    features2.features = {
-        .robustBufferAccess = false,
-        .fullDrawIndexUint32 = false,
-        .imageCubeArray = true,
-        .independentBlend = true,
-        .geometryShader = true,
-        .tessellationShader = true,
-        .sampleRateShading = false,
-        .dualSrcBlend = false,
-        .logicOp = false,
-        .multiDrawIndirect = false,
-        .drawIndirectFirstInstance = false,
-        .depthClamp = true,
-        .depthBiasClamp = true,
-        .fillModeNonSolid = false,
-        .depthBounds = false,
-        .wideLines = false,
-        .largePoints = true,
-        .alphaToOne = false,
-        .multiViewport = true,
-        .samplerAnisotropy = true,
-        .textureCompressionETC2 = false,
-        .textureCompressionASTC_LDR = is_optimal_astc_supported,
-        .textureCompressionBC = false,
-        .occlusionQueryPrecise = true,
-        .pipelineStatisticsQuery = false,
-        .vertexPipelineStoresAndAtomics = true,
-        .fragmentStoresAndAtomics = true,
-        .shaderTessellationAndGeometryPointSize = false,
-        .shaderImageGatherExtended = true,
-        .shaderStorageImageExtendedFormats = false,
-        .shaderStorageImageMultisample = is_shader_storage_image_multisample,
-        .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
-        .shaderStorageImageWriteWithoutFormat = true,
-        .shaderUniformBufferArrayDynamicIndexing = false,
-        .shaderSampledImageArrayDynamicIndexing = false,
-        .shaderStorageBufferArrayDynamicIndexing = false,
-        .shaderStorageImageArrayDynamicIndexing = false,
-        .shaderClipDistance = false,
-        .shaderCullDistance = false,
-        .shaderFloat64 = false,
-        .shaderInt64 = false,
-        .shaderInt16 = false,
-        .shaderResourceResidency = false,
-        .shaderResourceMinLod = false,
-        .sparseBinding = false,
-        .sparseResidencyBuffer = false,
-        .sparseResidencyImage2D = false,
-        .sparseResidencyImage3D = false,
-        .sparseResidency2Samples = false,
-        .sparseResidency4Samples = false,
-        .sparseResidency8Samples = false,
-        .sparseResidency16Samples = false,
-        .sparseResidencyAliased = false,
-        .variableMultisampleRate = false,
-        .inheritedQueries = false,
-    };
     VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
         .pNext = nullptr,
@@ -379,20 +380,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
         LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
     }
 
-    VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
-    if (ext_robustness2) {
-        robustness2 = {
-            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
-            .pNext = nullptr,
-            .robustBufferAccess2 = false,
-            .robustImageAccess2 = true,
-            .nullDescriptor = true,
-        };
-        SetNext(next, robustness2);
-    } else {
-        LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
-    }
-
     if (!ext_depth_range_unrestricted) {
         LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
     }
@@ -535,16 +522,18 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
     return (supported_usage & wanted_usage) == wanted_usage;
 }
 
-void Device::CheckSuitability() const {
+void Device::CheckSuitability(bool requires_swapchain) const {
     std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
+    bool has_swapchain = false;
     for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) {
-        for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
+        const std::string_view name{property.extensionName};
+        for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
             if (available_extensions[i]) {
                 continue;
             }
-            const std::string_view name{property.extensionName};
             available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
         }
+        has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME;
     }
     for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
         if (available_extensions[i]) {
@@ -553,6 +542,11 @@ void Device::CheckSuitability() const {
         LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
         throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
     }
+    if (requires_swapchain && !has_swapchain) {
+        LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain");
+        throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
+    }
+
     struct LimitTuple {
         u32 minimum;
         u32 value;
@@ -572,9 +566,20 @@ void Device::CheckSuitability() const {
             throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
         }
     }
-    const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
+    VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
+    robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
+
+    VkPhysicalDeviceFeatures2 features2{};
+    features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+    features2.pNext = &robustness2;
+
+    physical.GetFeatures2KHR(features2);
+
+    const VkPhysicalDeviceFeatures& features{features2.features};
     const std::array feature_report{
+        std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
         std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
+        std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
         std::make_pair(features.imageCubeArray, "imageCubeArray"),
         std::make_pair(features.independentBlend, "independentBlend"),
         std::make_pair(features.depthClamp, "depthClamp"),
@@ -589,6 +594,9 @@ void Device::CheckSuitability() const {
         std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
         std::make_pair(features.shaderStorageImageWriteWithoutFormat,
                        "shaderStorageImageWriteWithoutFormat"),
+        std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
+        std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
+        std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
     };
     for (const auto& [is_supported, name] : feature_report) {
         if (is_supported) {
@@ -599,17 +607,19 @@ void Device::CheckSuitability() const {
     }
 }
 
-std::vector<const char*> Device::LoadExtensions() {
+std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
     std::vector<const char*> extensions;
-    extensions.reserve(7 + REQUIRED_EXTENSIONS.size());
+    extensions.reserve(8 + REQUIRED_EXTENSIONS.size());
     extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
+    if (requires_surface) {
+        extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+    }
 
     bool has_khr_shader_float16_int8{};
     bool has_ext_subgroup_size_control{};
     bool has_ext_transform_feedback{};
     bool has_ext_custom_border_color{};
     bool has_ext_extended_dynamic_state{};
-    bool has_ext_robustness2{};
     for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
         const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
                               bool push) {
@@ -637,14 +647,12 @@ std::vector<const char*> Device::LoadExtensions() {
         test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
         test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
         test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
-        test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
         test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
         if (Settings::values.renderer_debug) {
             test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
                  true);
         }
     }
-
     VkPhysicalDeviceFeatures2KHR features;
     features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
 
@@ -661,7 +669,6 @@ std::vector<const char*> Device::LoadExtensions() {
         is_float16_supported = float16_int8_features.shaderFloat16;
         extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
     }
-
     if (has_ext_subgroup_size_control) {
         VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features;
         subgroup_features.sType =
@@ -688,7 +695,6 @@ std::vector<const char*> Device::LoadExtensions() {
     } else {
         is_warp_potentially_bigger = true;
     }
-
     if (has_ext_transform_feedback) {
         VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
         tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
@@ -710,7 +716,6 @@ std::vector<const char*> Device::LoadExtensions() {
             ext_transform_feedback = true;
         }
     }
-
     if (has_ext_custom_border_color) {
         VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features;
         border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
@@ -723,7 +728,6 @@ std::vector<const char*> Device::LoadExtensions() {
             ext_custom_border_color = true;
         }
     }
-
     if (has_ext_extended_dynamic_state) {
         VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
         dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
@@ -736,19 +740,6 @@ std::vector<const char*> Device::LoadExtensions() {
             ext_extended_dynamic_state = true;
         }
     }
-
-    if (has_ext_robustness2) {
-        VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
-        robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
-        robustness2.pNext = nullptr;
-        features.pNext = &robustness2;
-        physical.GetFeatures2KHR(features);
-        if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
-            extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
-            ext_robustness2 = true;
-        }
-    }
-
     return extensions;
 }
 
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 4b66dba7a..67d70cd22 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -23,7 +23,7 @@ enum class FormatType { Linear, Optimal, Buffer };
 const u32 GuestWarpSize = 32;
 
 /// Handles data specific to a physical device.
-class Device final {
+class Device {
 public:
     explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
                     const vk::InstanceDispatch& dld);
@@ -227,10 +227,10 @@ public:
 
 private:
     /// Checks if the physical device is suitable.
-    void CheckSuitability() const;
+    void CheckSuitability(bool requires_swapchain) const;
 
     /// Loads extensions into a vector and stores available ones in this object.
-    std::vector<const char*> LoadExtensions();
+    std::vector<const char*> LoadExtensions(bool requires_surface);
 
     /// Sets up queue families.
     void SetupFamilies(VkSurfaceKHR surface);
@@ -285,7 +285,6 @@ private:
     bool ext_transform_feedback{};              ///< Support for VK_EXT_transform_feedback.
     bool ext_custom_border_color{};             ///< Support for VK_EXT_custom_border_color.
     bool ext_extended_dynamic_state{};          ///< Support for VK_EXT_extended_dynamic_state.
-    bool ext_robustness2{};                     ///< Support for VK_EXT_robustness2.
     bool ext_shader_stencil_export{};           ///< Support for VK_EXT_shader_stencil_export.
     bool nv_device_diagnostics_config{};        ///< Support for VK_NV_device_diagnostics_config.
     bool has_renderdoc{};                       ///< Has RenderDoc attached
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index 889ecda0c..bfd6e6add 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <future>
 #include <optional>
 #include <span>
 #include <utility>
@@ -140,7 +141,10 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
                   VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version));
         throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
     }
-    vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld);
+    vk::Instance instance =
+        std::async([&] {
+            return vk::Instance::Create(required_version, layers, extensions, dld);
+        }).get();
     if (!vk::Load(*instance, dld)) {
         LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
         throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index d6eb3af31..2a8b7a907 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -7,6 +7,8 @@
 #include <optional>
 #include <vector>
 
+#include <glad/glad.h>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
@@ -55,10 +57,24 @@ struct Range {
 
 class MemoryAllocation {
 public:
-    explicit MemoryAllocation(const Device& device_, vk::DeviceMemory memory_,
-                              VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
-        : device{device_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
-          property_flags{properties}, shifted_memory_type{1U << type} {}
+    explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties,
+                              u64 allocation_size_, u32 type)
+        : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties},
+          shifted_memory_type{1U << type} {}
+
+#if defined(_WIN32) || defined(__linux__)
+    ~MemoryAllocation() {
+        if (owning_opengl_handle != 0) {
+            glDeleteMemoryObjectsEXT(1, &owning_opengl_handle);
+        }
+    }
+#endif
+
+    MemoryAllocation& operator=(const MemoryAllocation&) = delete;
+    MemoryAllocation(const MemoryAllocation&) = delete;
+
+    MemoryAllocation& operator=(MemoryAllocation&&) = delete;
+    MemoryAllocation(MemoryAllocation&&) = delete;
 
     [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) {
         const std::optional<u64> alloc = FindFreeRegion(size, alignment);
@@ -88,6 +104,31 @@ public:
         return memory_mapped_span;
     }
 
+#ifdef _WIN32
+    [[nodiscard]] u32 ExportOpenGLHandle() {
+        if (!owning_opengl_handle) {
+            glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
+            glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size,
+                                         GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
+                                         memory.GetMemoryWin32HandleKHR());
+        }
+        return owning_opengl_handle;
+    }
+#elif __linux__
+    [[nodiscard]] u32 ExportOpenGLHandle() {
+        if (!owning_opengl_handle) {
+            glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
+            glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT,
+                                memory.GetMemoryFdKHR());
+        }
+        return owning_opengl_handle;
+    }
+#else
+    [[nodiscard]] u32 ExportOpenGLHandle() {
+        return 0;
+    }
+#endif
+
     /// Returns whether this allocation is compatible with the arguments.
     [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
         return (flags & property_flags) && (type_mask & shifted_memory_type) != 0;
@@ -118,13 +159,15 @@ private:
         return candidate;
     }
 
-    const Device& device;                       ///< Vulkan device.
     const vk::DeviceMemory memory;              ///< Vulkan memory allocation handler.
     const u64 allocation_size;                  ///< Size of this allocation.
     const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
     const u32 shifted_memory_type;              ///< Shifted Vulkan memory type.
     std::vector<Range> commits;                 ///< All commit ranges done from this allocation.
     std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
+#if defined(_WIN32) || defined(__linux__)
+    u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
+#endif
 };
 
 MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
@@ -156,14 +199,19 @@ std::span<u8> MemoryCommit::Map() {
     return span;
 }
 
+u32 MemoryCommit::ExportOpenGLHandle() const {
+    return allocation->ExportOpenGLHandle();
+}
+
 void MemoryCommit::Release() {
     if (allocation) {
         allocation->Free(begin);
     }
 }
 
-MemoryAllocator::MemoryAllocator(const Device& device_)
-    : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {}
+MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
+    : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
+      export_allocations{export_allocations_} {}
 
 MemoryAllocator::~MemoryAllocator() = default;
 
@@ -196,14 +244,24 @@ MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage)
 
 void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
     const u32 type = FindType(flags, type_mask).value();
+    const VkExportMemoryAllocateInfo export_allocate_info{
+        .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
+        .pNext = nullptr,
+#ifdef _WIN32
+        .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
+#elif __linux__
+        .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+#else
+        .handleTypes = 0,
+#endif
+    };
     vk::DeviceMemory memory = device.GetLogical().AllocateMemory({
         .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
-        .pNext = nullptr,
+        .pNext = export_allocations ? &export_allocate_info : nullptr,
         .allocationSize = size,
         .memoryTypeIndex = type,
     });
-    allocations.push_back(
-        std::make_unique<MemoryAllocation>(device, std::move(memory), flags, size, type));
+    allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
 }
 
 std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index 9e6cfabf9..d1ce29450 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -43,6 +43,9 @@ public:
     /// It will map the backing allocation if it hasn't been mapped before.
     std::span<u8> Map();
 
+    /// Returns an non-owning OpenGL handle, creating one if it doesn't exist.
+    u32 ExportOpenGLHandle() const;
+
     /// Returns the Vulkan memory handler.
     VkDeviceMemory Memory() const {
         return memory;
@@ -67,7 +70,15 @@ private:
 /// Allocates and releases memory allocations on demand.
 class MemoryAllocator {
 public:
-    explicit MemoryAllocator(const Device& device_);
+    /**
+     * Construct memory allocator
+     *
+     * @param device_             Device to allocate from
+     * @param export_allocations_ True when allocations have to be exported
+     *
+     * @throw vk::Exception on failure
+     */
+    explicit MemoryAllocator(const Device& device_, bool export_allocations_);
     ~MemoryAllocator();
 
     MemoryAllocator& operator=(const MemoryAllocator&) = delete;
@@ -106,8 +117,9 @@ private:
     /// Returns index to the fastest memory type compatible with the passed requirements.
     std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
 
-    const Device& device;                                       ///< Device handle.
-    const VkPhysicalDeviceMemoryProperties properties;          ///< Physical device properties.
+    const Device& device;                              ///< Device handle.
+    const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
+    const bool export_allocations; ///< True when memory allocations have to be exported.
     std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
 };
 
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 5e15ad607..2aa0ffbe6 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -168,11 +168,15 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
     X(vkFreeCommandBuffers);
     X(vkFreeDescriptorSets);
     X(vkFreeMemory);
-    X(vkGetBufferMemoryRequirements);
+    X(vkGetBufferMemoryRequirements2);
     X(vkGetDeviceQueue);
     X(vkGetEventStatus);
     X(vkGetFenceStatus);
     X(vkGetImageMemoryRequirements);
+    X(vkGetMemoryFdKHR);
+#ifdef _WIN32
+    X(vkGetMemoryWin32HandleKHR);
+#endif
     X(vkGetQueryPoolResults);
     X(vkGetSemaphoreCounterValueKHR);
     X(vkMapMemory);
@@ -505,6 +509,32 @@ void ImageView::SetObjectNameEXT(const char* name) const {
     SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
 }
 
+int DeviceMemory::GetMemoryFdKHR() const {
+    const VkMemoryGetFdInfoKHR get_fd_info{
+        .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
+        .pNext = nullptr,
+        .memory = handle,
+        .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+    };
+    int fd;
+    Check(dld->vkGetMemoryFdKHR(owner, &get_fd_info, &fd));
+    return fd;
+}
+
+#ifdef _WIN32
+HANDLE DeviceMemory::GetMemoryWin32HandleKHR() const {
+    const VkMemoryGetWin32HandleInfoKHR get_win32_handle_info{
+        .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
+        .pNext = nullptr,
+        .memory = handle,
+        .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
+    };
+    HANDLE win32_handle;
+    Check(dld->vkGetMemoryWin32HandleKHR(owner, &get_win32_handle_info, &win32_handle));
+    return win32_handle;
+}
+#endif
+
 void DeviceMemory::SetObjectNameEXT(const char* name) const {
     SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
 }
@@ -756,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
     return DeviceMemory(memory, handle, *dld);
 }
 
-VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept {
-    VkMemoryRequirements requirements;
-    dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements);
-    return requirements;
+VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer,
+                                                         void* pnext) const noexcept {
+    const VkBufferMemoryRequirementsInfo2 info{
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
+        .pNext = nullptr,
+        .buffer = buffer,
+    };
+    VkMemoryRequirements2 requirements{
+        .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+        .pNext = pnext,
+        .memoryRequirements{},
+    };
+    dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements);
+    return requirements.memoryRequirements;
 }
 
 VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 9689de0cb..3e36d356a 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -15,8 +15,19 @@
 #include <vector>
 
 #define VK_NO_PROTOTYPES
+#ifdef _WIN32
+#define VK_USE_PLATFORM_WIN32_KHR
+#endif
 #include <vulkan/vulkan.h>
 
+// Sanitize macros
+#ifdef CreateEvent
+#undef CreateEvent
+#endif
+#ifdef CreateSemaphore
+#undef CreateSemaphore
+#endif
+
 #include "common/common_types.h"
 
 #ifdef _MSC_VER
@@ -174,7 +185,7 @@ struct InstanceDispatch {
 };
 
 /// Table holding Vulkan device function pointers.
-struct DeviceDispatch : public InstanceDispatch {
+struct DeviceDispatch : InstanceDispatch {
     PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{};
     PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{};
     PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{};
@@ -272,11 +283,15 @@ struct DeviceDispatch : public InstanceDispatch {
     PFN_vkFreeCommandBuffers vkFreeCommandBuffers{};
     PFN_vkFreeDescriptorSets vkFreeDescriptorSets{};
     PFN_vkFreeMemory vkFreeMemory{};
-    PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{};
+    PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{};
     PFN_vkGetDeviceQueue vkGetDeviceQueue{};
     PFN_vkGetEventStatus vkGetEventStatus{};
     PFN_vkGetFenceStatus vkGetFenceStatus{};
     PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{};
+    PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{};
+#ifdef _WIN32
+    PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
+#endif
     PFN_vkGetQueryPoolResults vkGetQueryPoolResults{};
     PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{};
     PFN_vkMapMemory vkMapMemory{};
@@ -344,6 +359,9 @@ public:
     /// Construct an empty handle.
     Handle() = default;
 
+    /// Construct an empty handle.
+    Handle(std::nullptr_t) {}
+
     /// Copying Vulkan objects is not supported and will never be.
     Handle(const Handle&) = delete;
     Handle& operator=(const Handle&) = delete;
@@ -659,6 +677,12 @@ class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
     using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
 
 public:
+    int GetMemoryFdKHR() const;
+
+#ifdef _WIN32
+    HANDLE GetMemoryWin32HandleKHR() const;
+#endif
+
     /// Set object name.
     void SetObjectNameEXT(const char* name) const;
 
@@ -847,7 +871,8 @@ public:
 
     DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const;
 
-    VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept;
+    VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer,
+                                                     void* pnext = nullptr) const noexcept;
 
     VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
 
@@ -1033,6 +1058,12 @@ public:
 
     void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
                          VkDependencyFlags dependency_flags,
+                         const VkMemoryBarrier& memory_barrier) const noexcept {
+        PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, memory_barrier, {}, {});
+    }
+
+    void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
+                         VkDependencyFlags dependency_flags,
                          const VkBufferMemoryBarrier& buffer_barrier) const noexcept {
         PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {});
     }
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index fb9967c8f..b025ced1c 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -151,6 +151,7 @@ add_executable(yuzu
     util/util.h
     compatdb.cpp
     compatdb.h
+    yuzu.qrc
     yuzu.rc
 )
 
diff --git a/src/yuzu/applets/controller.cpp b/src/yuzu/applets/controller.cpp
index c680fd2c2..b92cd6886 100644
--- a/src/yuzu/applets/controller.cpp
+++ b/src/yuzu/applets/controller.cpp
@@ -67,6 +67,8 @@ bool IsControllerCompatible(Settings::ControllerType controller_type,
         return parameters.allow_right_joycon;
     case Settings::ControllerType::Handheld:
         return parameters.enable_single_mode && parameters.allow_handheld;
+    case Settings::ControllerType::GameCube:
+        return parameters.allow_gamecube_controller;
     default:
         return false;
     }
@@ -370,7 +372,7 @@ void QtControllerSelectorDialog::SetSupportedControllers() {
             QStringLiteral("image: url(:/controller/applet_joycon_right%0_disabled); ").arg(theme));
     }
 
-    if (parameters.allow_pro_controller) {
+    if (parameters.allow_pro_controller || parameters.allow_gamecube_controller) {
         ui->controllerSupported5->setStyleSheet(
             QStringLiteral("image: url(:/controller/applet_pro_controller%0); ").arg(theme));
     } else {
@@ -420,6 +422,10 @@ void QtControllerSelectorDialog::SetEmulatedControllers(std::size_t player_index
                            Settings::ControllerType::Handheld);
         emulated_controllers[player_index]->addItem(tr("Handheld"));
     }
+
+    pairs.emplace_back(emulated_controllers[player_index]->count(),
+                       Settings::ControllerType::GameCube);
+    emulated_controllers[player_index]->addItem(tr("GameCube Controller"));
 }
 
 Settings::ControllerType QtControllerSelectorDialog::GetControllerTypeFromIndex(
@@ -461,6 +467,7 @@ void QtControllerSelectorDialog::UpdateControllerIcon(std::size_t player_index)
         switch (GetControllerTypeFromIndex(emulated_controllers[player_index]->currentIndex(),
                                            player_index)) {
         case Settings::ControllerType::ProController:
+        case Settings::ControllerType::GameCube:
             return QStringLiteral("image: url(:/controller/applet_pro_controller%0); ");
         case Settings::ControllerType::DualJoyconDetached:
             return QStringLiteral("image: url(:/controller/applet_dual_joycon%0); ");
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index ffdf34a4a..1c61d419d 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -64,7 +64,7 @@ void EmuThread::run() {
 
     emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
 
-    system.Renderer().Rasterizer().LoadDiskResources(
+    system.Renderer().ReadRasterizer()->LoadDiskResources(
         system.CurrentProcess()->GetTitleID(), stop_run,
         [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
             emit LoadProgress(stage, value, total);
@@ -405,12 +405,17 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
     if (event->source() == Qt::MouseEventSynthesizedBySystem) {
         return;
     }
-
     auto pos = event->pos();
     const auto [x, y] = ScaleTouch(pos);
-    input_subsystem->GetMouse()->MouseMove(x, y);
+    const int center_x = width() / 2;
+    const int center_y = height() / 2;
+    input_subsystem->GetMouse()->MouseMove(x, y, center_x, center_y);
     this->TouchMoved(x, y, 0);
 
+    if (Settings::values.mouse_panning) {
+        QCursor::setPos(mapToGlobal({center_x, center_y}));
+    }
+
     emit MouseActivity();
 }
 
@@ -714,6 +719,11 @@ void GRenderWindow::showEvent(QShowEvent* event) {
 
 bool GRenderWindow::eventFilter(QObject* object, QEvent* event) {
     if (event->type() == QEvent::HoverMove) {
+        if (Settings::values.mouse_panning) {
+            auto* hover_event = static_cast<QMouseEvent*>(event);
+            mouseMoveEvent(hover_event);
+            return false;
+        }
         emit MouseActivity();
     }
     return false;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 8d85a1986..3d6f64300 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -220,7 +220,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
 // This must be in alphabetical order according to action name as it must have the same order as
 // UISetting::values.shortcuts, which is alphabetically ordered.
 // clang-format off
-const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{
+const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{
     {QStringLiteral("Capture Screenshot"),       QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}},
     {QStringLiteral("Change Docked Mode"),       QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}},
     {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}},
@@ -235,6 +235,7 @@ const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{
     {QStringLiteral("Restart Emulation"),        QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}},
     {QStringLiteral("Stop Emulation"),           QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}},
     {QStringLiteral("Toggle Filter Bar"),        QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}},
+    {QStringLiteral("Toggle Mouse Panning"),     QStringLiteral("Main Window"), {QStringLiteral("F9"), Qt::ApplicationShortcut}},
     {QStringLiteral("Toggle Speed Limit"),       QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}},
     {QStringLiteral("Toggle Status Bar"),        QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}},
 }};
@@ -507,6 +508,9 @@ void Config::ReadControlValues() {
 
     Settings::values.emulate_analog_keyboard =
         ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool();
+    Settings::values.mouse_panning = ReadSetting(QStringLiteral("mouse_panning"), false).toBool();
+    Settings::values.mouse_panning_sensitivity =
+        ReadSetting(QStringLiteral("mouse_panning_sensitivity"), 1).toFloat();
 
     ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true);
     ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"),
@@ -610,12 +614,6 @@ void Config::ReadDataStorageValues() {
                                 QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)))
                         .toString()
                         .toStdString());
-    FS::GetUserPath(FS::UserPath::CacheDir,
-                    qt_config
-                        ->value(QStringLiteral("cache_directory"),
-                                QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir)))
-                        .toString()
-                        .toStdString());
     Settings::values.gamecard_inserted =
         ReadSetting(QStringLiteral("gamecard_inserted"), false).toBool();
     Settings::values.gamecard_current_game =
@@ -778,14 +776,14 @@ void Config::ReadRendererValues() {
     ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100);
     ReadSettingGlobal(Settings::values.use_disk_shader_cache,
                       QStringLiteral("use_disk_shader_cache"), true);
-    ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0);
+    ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 1);
     ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation,
                       QStringLiteral("use_asynchronous_gpu_emulation"), true);
     ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"),
                       true);
     ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true);
     ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"),
-                      true);
+                      false);
     ReadSettingGlobal(Settings::values.use_asynchronous_shaders,
                       QStringLiteral("use_asynchronous_shaders"), false);
     ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"),
@@ -1184,7 +1182,9 @@ void Config::SaveControlValues() {
     WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false);
     WriteSetting(QStringLiteral("emulate_analog_keyboard"),
                  Settings::values.emulate_analog_keyboard, false);
-
+    WriteSetting(QStringLiteral("mouse_panning"), Settings::values.mouse_panning, false);
+    WriteSetting(QStringLiteral("mouse_panning_sensitivity"),
+                 Settings::values.mouse_panning_sensitivity, 1.0f);
     qt_config->endGroup();
 }
 
@@ -1212,9 +1212,6 @@ void Config::SaveDataStorageValues() {
     WriteSetting(QStringLiteral("dump_directory"),
                  QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)),
                  QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)));
-    WriteSetting(QStringLiteral("cache_directory"),
-                 QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir)),
-                 QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir)));
     WriteSetting(QStringLiteral("gamecard_inserted"), Settings::values.gamecard_inserted, false);
     WriteSetting(QStringLiteral("gamecard_current_game"), Settings::values.gamecard_current_game,
                  false);
@@ -1345,14 +1342,14 @@ void Config::SaveRendererValues() {
                        Settings::values.use_disk_shader_cache, true);
     WriteSettingGlobal(QStringLiteral("gpu_accuracy"),
                        static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)),
-                       Settings::values.gpu_accuracy.UsingGlobal(), 0);
+                       Settings::values.gpu_accuracy.UsingGlobal(), 1);
     WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"),
                        Settings::values.use_asynchronous_gpu_emulation, true);
     WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation,
                        true);
     WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
     WriteSettingGlobal(QStringLiteral("use_assembly_shaders"),
-                       Settings::values.use_assembly_shaders, true);
+                       Settings::values.use_assembly_shaders, false);
     WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"),
                        Settings::values.use_asynchronous_shaders, false);
     WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time,
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 8a600e19d..949c4eb13 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -42,7 +42,7 @@ public:
         default_mouse_buttons;
     static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys;
     static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods;
-    static const std::array<UISettings::Shortcut, 16> default_hotkeys;
+    static const std::array<UISettings::Shortcut, 17> default_hotkeys;
 
 private:
     void Initialize(const std::string& config_name);
diff --git a/src/yuzu/configuration/configure_filesystem.cpp b/src/yuzu/configuration/configure_filesystem.cpp
index 7ab4a80f7..bde2d4620 100644
--- a/src/yuzu/configuration/configure_filesystem.cpp
+++ b/src/yuzu/configuration/configure_filesystem.cpp
@@ -26,8 +26,6 @@ ConfigureFilesystem::ConfigureFilesystem(QWidget* parent)
             [this] { SetDirectory(DirectoryTarget::Dump, ui->dump_path_edit); });
     connect(ui->load_path_button, &QToolButton::pressed, this,
             [this] { SetDirectory(DirectoryTarget::Load, ui->load_path_edit); });
-    connect(ui->cache_directory_button, &QToolButton::pressed, this,
-            [this] { SetDirectory(DirectoryTarget::Cache, ui->cache_directory_edit); });
 
     connect(ui->reset_game_list_cache, &QPushButton::pressed, this,
             &ConfigureFilesystem::ResetMetadata);
@@ -50,8 +48,6 @@ void ConfigureFilesystem::setConfiguration() {
         QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::DumpDir)));
     ui->load_path_edit->setText(
         QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::LoadDir)));
-    ui->cache_directory_edit->setText(
-        QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)));
 
     ui->gamecard_inserted->setChecked(Settings::values.gamecard_inserted);
     ui->gamecard_current_game->setChecked(Settings::values.gamecard_current_game);
@@ -72,9 +68,6 @@ void ConfigureFilesystem::applyConfiguration() {
                             ui->dump_path_edit->text().toStdString());
     Common::FS::GetUserPath(Common::FS::UserPath::LoadDir,
                             ui->load_path_edit->text().toStdString());
-    Common::FS::GetUserPath(Common::FS::UserPath::CacheDir,
-                            ui->cache_directory_edit->text().toStdString());
-    Settings::values.gamecard_path = ui->gamecard_path_edit->text().toStdString();
 
     Settings::values.gamecard_inserted = ui->gamecard_inserted->isChecked();
     Settings::values.gamecard_current_game = ui->gamecard_current_game->isChecked();
@@ -103,9 +96,6 @@ void ConfigureFilesystem::SetDirectory(DirectoryTarget target, QLineEdit* edit)
     case DirectoryTarget::Load:
         caption = tr("Select Mod Load Directory...");
         break;
-    case DirectoryTarget::Cache:
-        caption = tr("Select Cache Directory...");
-        break;
     }
 
     QString str;
diff --git a/src/yuzu/configuration/configure_filesystem.h b/src/yuzu/configuration/configure_filesystem.h
index a79303760..2147cd405 100644
--- a/src/yuzu/configuration/configure_filesystem.h
+++ b/src/yuzu/configuration/configure_filesystem.h
@@ -32,7 +32,6 @@ private:
         Gamecard,
         Dump,
         Load,
-        Cache,
     };
 
     void SetDirectory(DirectoryTarget target, QLineEdit* edit);
diff --git a/src/yuzu/configuration/configure_filesystem.ui b/src/yuzu/configuration/configure_filesystem.ui
index 84bea0600..62b9abc7a 100644
--- a/src/yuzu/configuration/configure_filesystem.ui
+++ b/src/yuzu/configuration/configure_filesystem.ui
@@ -198,40 +198,7 @@
         <string>Caching</string>
        </property>
        <layout class="QGridLayout" name="gridLayout_5">
-        <item row="0" column="0">
-         <widget class="QLabel" name="label_10">
-          <property name="text">
-           <string>Cache Directory</string>
-          </property>
-         </widget>
-        </item>
-        <item row="0" column="1">
-         <spacer name="horizontalSpacer_3">
-          <property name="orientation">
-           <enum>Qt::Horizontal</enum>
-          </property>
-          <property name="sizeType">
-           <enum>QSizePolicy::Fixed</enum>
-          </property>
-          <property name="sizeHint" stdset="0">
-           <size>
-            <width>40</width>
-            <height>20</height>
-           </size>
-          </property>
-         </spacer>
-        </item>
-        <item row="0" column="2">
-         <widget class="QLineEdit" name="cache_directory_edit"/>
-        </item>
-        <item row="0" column="3">
-         <widget class="QToolButton" name="cache_directory_button">
-          <property name="text">
-           <string>...</string>
-          </property>
-         </widget>
-        </item>
-        <item row="1" column="0" colspan="4">
+        <item row="0" column="0" colspan="2">
          <layout class="QHBoxLayout" name="horizontalLayout_2">
           <item>
            <widget class="QCheckBox" name="cache_game_list">
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index b78a5dff0..9ff32aec4 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -2,6 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+// Include this early to include Vulkan headers how we want to
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
 #include <QColorDialog>
 #include <QComboBox>
 #include <QVulkanInstance>
@@ -11,7 +14,8 @@
 #include "core/core.h"
 #include "core/settings.h"
 #include "ui_configure_graphics.h"
-#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#include "video_core/vulkan_common/vulkan_instance.h"
+#include "video_core/vulkan_common/vulkan_library.h"
 #include "yuzu/configuration/configuration_shared.h"
 #include "yuzu/configuration/configure_graphics.h"
 
@@ -212,11 +216,23 @@ void ConfigureGraphics::UpdateDeviceComboBox() {
     ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn());
 }
 
-void ConfigureGraphics::RetrieveVulkanDevices() {
+void ConfigureGraphics::RetrieveVulkanDevices() try {
+    using namespace Vulkan;
+
+    vk::InstanceDispatch dld;
+    const Common::DynamicLibrary library = OpenLibrary();
+    const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
+    const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
+
     vulkan_devices.clear();
-    for (const auto& name : Vulkan::RendererVulkan::EnumerateDevices()) {
+    vulkan_devices.reserve(physical_devices.size());
+    for (const VkPhysicalDevice device : physical_devices) {
+        const char* const name = vk::PhysicalDevice(device, dld).GetProperties().deviceName;
         vulkan_devices.push_back(QString::fromStdString(name));
     }
+
+} catch (const Vulkan::vk::Exception& exception) {
+    LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
 }
 
 Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
diff --git a/src/yuzu/configuration/configure_input_advanced.cpp b/src/yuzu/configuration/configure_input_advanced.cpp
index 4e557bc6f..a1a0eb676 100644
--- a/src/yuzu/configuration/configure_input_advanced.cpp
+++ b/src/yuzu/configuration/configure_input_advanced.cpp
@@ -122,6 +122,9 @@ void ConfigureInputAdvanced::ApplyConfiguration() {
     Settings::values.mouse_enabled = ui->mouse_enabled->isChecked();
     Settings::values.keyboard_enabled = ui->keyboard_enabled->isChecked();
     Settings::values.emulate_analog_keyboard = ui->emulate_analog_keyboard->isChecked();
+    Settings::values.mouse_panning = ui->mouse_panning->isChecked();
+    Settings::values.mouse_panning_sensitivity =
+        static_cast<float>(ui->mouse_panning_sensitivity->value());
     Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked();
 }
 
@@ -149,6 +152,8 @@ void ConfigureInputAdvanced::LoadConfiguration() {
     ui->mouse_enabled->setChecked(Settings::values.mouse_enabled);
     ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled);
     ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard);
+    ui->mouse_panning->setChecked(Settings::values.mouse_panning);
+    ui->mouse_panning_sensitivity->setValue(Settings::values.mouse_panning_sensitivity);
     ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled);
 
     UpdateUIEnabled();
diff --git a/src/yuzu/configuration/configure_input_advanced.ui b/src/yuzu/configuration/configure_input_advanced.ui
index f207e5d3b..173130d8d 100644
--- a/src/yuzu/configuration/configure_input_advanced.ui
+++ b/src/yuzu/configuration/configure_input_advanced.ui
@@ -2546,27 +2546,65 @@
                  </property>
                 </widget>
                </item>
-               <item row="1" column="0">
-                 <widget class="QCheckBox" name="emulate_analog_keyboard">
-                   <property name="minimumSize">
-                     <size>
-                       <width>0</width>
-                       <height>23</height>
-                     </size>
-                   </property>
-                   <property name="text">
-                     <string>Emulate Analog with Keyboard Input</string>
-                   </property>
-                 </widget>
-               </item>
-               <item row="5" column="2">
+                <item row="1" column="0">
+                  <widget class="QCheckBox" name="emulate_analog_keyboard">
+                    <property name="minimumSize">
+                      <size>
+                        <width>0</width>
+                        <height>23</height>
+                      </size>
+                    </property>
+                    <property name="text">
+                      <string>Emulate Analog with Keyboard Input</string>
+                    </property>
+                  </widget>
+                </item>
+                <item row="2" column="0">
+                  <widget class="QCheckBox" name="mouse_panning">
+                    <property name="minimumSize">
+                      <size>
+                        <width>0</width>
+                        <height>23</height>
+                      </size>
+                    </property>
+                    <property name="text">
+                      <string>Enable mouse panning</string>
+                    </property>
+                  </widget>
+                </item>
+                <item row="2" column="2">
+                    <widget class="QDoubleSpinBox" name="mouse_panning_sensitivity">
+                      <property name="toolTip">
+                        <string>Mouse sensitivity</string>
+                      </property>
+                      <property name="alignment">
+                        <set>Qt::AlignCenter</set>
+                      </property>
+                      <property name="decimals">
+                        <number>2</number>
+                      </property>
+                      <property name="minimum">
+                        <double>0.100000000000000</double>
+                      </property>
+                      <property name="maximum">
+                        <double>16.000000000000000</double>
+                      </property>
+                      <property name="singleStep">
+                        <double>0.010000000000000</double>
+                      </property>
+                      <property name="value">
+                        <double>1.000000000000000</double>
+                      </property>
+                    </widget>
+                </item>
+               <item row="6" column="2">
                 <widget class="QPushButton" name="touchscreen_advanced">
                  <property name="text">
                   <string>Advanced</string>
                  </property>
                 </widget>
                </item>
-               <item row="2" column="1">
+               <item row="3" column="1">
                 <spacer name="horizontalSpacer_8">
                  <property name="orientation">
                   <enum>Qt::Horizontal</enum>
@@ -2582,21 +2620,21 @@
                  </property>
                 </spacer>
                </item>
-               <item row="2" column="2">
+               <item row="3" column="2">
                 <widget class="QPushButton" name="mouse_advanced">
                  <property name="text">
                   <string>Advanced</string>
                  </property>
                 </widget>
                </item>
-               <item row="5" column="0">
+               <item row="6" column="0">
                 <widget class="QCheckBox" name="touchscreen_enabled">
                  <property name="text">
                   <string>Touchscreen</string>
                  </property>
                 </widget>
                </item>
-               <item row="2" column="0">
+               <item row="3" column="0">
                 <widget class="QCheckBox" name="mouse_enabled">
                  <property name="minimumSize">
                   <size>
@@ -2609,28 +2647,28 @@
                  </property>
                 </widget>
                </item>
-               <item row="7" column="0">
+               <item row="8" column="0">
                 <widget class="QLabel" name="motion_touch">
                  <property name="text">
                   <string>Motion / Touch</string>
                  </property>
                 </widget>
                </item>
-               <item row="7" column="2">
+               <item row="8" column="2">
                 <widget class="QPushButton" name="buttonMotionTouch">
                  <property name="text">
                   <string>Configure</string>
                  </property>
                 </widget>
                </item>
-               <item row="6" column="0">
+               <item row="7" column="0">
                 <widget class="QCheckBox" name="debug_enabled">
                  <property name="text">
                   <string>Debug Controller</string>
                  </property>
                 </widget>
                </item>
-               <item row="6" column="2">
+               <item row="7" column="2">
                 <widget class="QPushButton" name="debug_configure">
                  <property name="text">
                   <string>Configure</string>
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index c9d19c948..21d0d3449 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -467,10 +467,14 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
 
     UpdateControllerIcon();
     UpdateControllerAvailableButtons();
+    UpdateControllerEnabledButtons();
+    UpdateControllerButtonNames();
     UpdateMotionButtons();
     connect(ui->comboControllerType, qOverload<int>(&QComboBox::currentIndexChanged), [this](int) {
         UpdateControllerIcon();
         UpdateControllerAvailableButtons();
+        UpdateControllerEnabledButtons();
+        UpdateControllerButtonNames();
         UpdateMotionButtons();
     });
 
@@ -558,9 +562,6 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
             &ConfigureInputPlayer::SaveProfile);
 
     LoadConfiguration();
-
-    // TODO(wwylele): enable this when we actually emulate it
-    ui->buttonHome->setEnabled(false);
     ui->controllerFrame->SetPlayerInput(player_index, buttons_param, analogs_param);
     ui->controllerFrame->SetConnectedStatus(ui->groupConnectedController->isChecked());
 }
@@ -924,6 +925,12 @@ void ConfigureInputPlayer::SetConnectableControllers() {
                                                      Settings::ControllerType::Handheld);
             ui->comboControllerType->addItem(tr("Handheld"));
         }
+
+        if (enable_all || npad_style_set.gamecube == 1) {
+            index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
+                                                     Settings::ControllerType::GameCube);
+            ui->comboControllerType->addItem(tr("GameCube Controller"));
+        }
     };
 
     Core::System& system{Core::System::GetInstance()};
@@ -1014,7 +1021,7 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
 
     // List of all the widgets that will be hidden by any of the following layouts that need
     // "unhidden" after the controller type changes
-    const std::array<QWidget*, 9> layout_show = {
+    const std::array<QWidget*, 11> layout_show = {
         ui->buttonShoulderButtonsSLSR,
         ui->horizontalSpacerShoulderButtonsWidget,
         ui->horizontalSpacerShoulderButtonsWidget2,
@@ -1024,6 +1031,8 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
         ui->buttonShoulderButtonsRight,
         ui->buttonMiscButtonsPlusHome,
         ui->bottomRight,
+        ui->buttonMiscButtonsMinusGroup,
+        ui->buttonMiscButtonsScreenshotGroup,
     };
 
     for (auto* widget : layout_show) {
@@ -1056,6 +1065,14 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
             ui->bottomLeft,
         };
         break;
+    case Settings::ControllerType::GameCube:
+        layout_hidden = {
+            ui->buttonShoulderButtonsSLSR,
+            ui->horizontalSpacerShoulderButtonsWidget2,
+            ui->buttonMiscButtonsMinusGroup,
+            ui->buttonMiscButtonsScreenshotGroup,
+        };
+        break;
     }
 
     for (auto* widget : layout_hidden) {
@@ -1063,6 +1080,52 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
     }
 }
 
+void ConfigureInputPlayer::UpdateControllerEnabledButtons() {
+    auto layout = GetControllerTypeFromIndex(ui->comboControllerType->currentIndex());
+    if (debug) {
+        layout = Settings::ControllerType::ProController;
+    }
+
+    // List of all the widgets that will be disabled by any of the following layouts that need
+    // "enabled" after the controller type changes
+    const std::array<QWidget*, 4> layout_enable = {
+        ui->buttonHome,
+        ui->buttonLStickPressedGroup,
+        ui->groupRStickPressed,
+        ui->buttonShoulderButtonsButtonLGroup,
+    };
+
+    for (auto* widget : layout_enable) {
+        widget->setEnabled(true);
+    }
+
+    std::vector<QWidget*> layout_disable;
+    switch (layout) {
+    case Settings::ControllerType::ProController:
+    case Settings::ControllerType::DualJoyconDetached:
+    case Settings::ControllerType::Handheld:
+    case Settings::ControllerType::LeftJoycon:
+    case Settings::ControllerType::RightJoycon:
+        // TODO(wwylele): enable this when we actually emulate it
+        layout_disable = {
+            ui->buttonHome,
+        };
+        break;
+    case Settings::ControllerType::GameCube:
+        layout_disable = {
+            ui->buttonHome,
+            ui->buttonLStickPressedGroup,
+            ui->groupRStickPressed,
+            ui->buttonShoulderButtonsButtonLGroup,
+        };
+        break;
+    }
+
+    for (auto* widget : layout_disable) {
+        widget->setEnabled(false);
+    }
+}
+
 void ConfigureInputPlayer::UpdateMotionButtons() {
     if (debug) {
         // Motion isn't used with the debug controller, hide both groupboxes.
@@ -1085,6 +1148,11 @@ void ConfigureInputPlayer::UpdateMotionButtons() {
         ui->buttonMotionLeftGroup->hide();
         ui->buttonMotionRightGroup->show();
         break;
+    case Settings::ControllerType::GameCube:
+        // Hide both "Motion 1/2".
+        ui->buttonMotionLeftGroup->hide();
+        ui->buttonMotionRightGroup->hide();
+        break;
     case Settings::ControllerType::DualJoyconDetached:
     default:
         // Show both "Motion 1/2".
@@ -1094,6 +1162,36 @@ void ConfigureInputPlayer::UpdateMotionButtons() {
     }
 }
 
+void ConfigureInputPlayer::UpdateControllerButtonNames() {
+    auto layout = GetControllerTypeFromIndex(ui->comboControllerType->currentIndex());
+    if (debug) {
+        layout = Settings::ControllerType::ProController;
+    }
+
+    switch (layout) {
+    case Settings::ControllerType::ProController:
+    case Settings::ControllerType::DualJoyconDetached:
+    case Settings::ControllerType::Handheld:
+    case Settings::ControllerType::LeftJoycon:
+    case Settings::ControllerType::RightJoycon:
+        ui->buttonMiscButtonsPlusGroup->setTitle(tr("Plus"));
+        ui->buttonShoulderButtonsButtonZLGroup->setTitle(tr("ZL"));
+        ui->buttonShoulderButtonsZRGroup->setTitle(tr("ZR"));
+        ui->buttonShoulderButtonsRGroup->setTitle(tr("R"));
+        ui->LStick->setTitle(tr("Left Stick"));
+        ui->RStick->setTitle(tr("Right Stick"));
+        break;
+    case Settings::ControllerType::GameCube:
+        ui->buttonMiscButtonsPlusGroup->setTitle(tr("Start / Pause"));
+        ui->buttonShoulderButtonsButtonZLGroup->setTitle(tr("L"));
+        ui->buttonShoulderButtonsZRGroup->setTitle(tr("R"));
+        ui->buttonShoulderButtonsRGroup->setTitle(tr("Z"));
+        ui->LStick->setTitle(tr("Control Stick"));
+        ui->RStick->setTitle(tr("C-Stick"));
+        break;
+    }
+}
+
 void ConfigureInputPlayer::UpdateMappingWithDefaults() {
     if (ui->comboDevices->currentIndex() == 0) {
         return;
diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h
index da2b89136..efe953fbc 100644
--- a/src/yuzu/configuration/configure_input_player.h
+++ b/src/yuzu/configuration/configure_input_player.h
@@ -143,9 +143,15 @@ private:
     /// Hides and disables controller settings based on the current controller type.
     void UpdateControllerAvailableButtons();
 
+    /// Disables controller settings based on the current controller type.
+    void UpdateControllerEnabledButtons();
+
     /// Shows or hides motion groupboxes based on the current controller type.
     void UpdateMotionButtons();
 
+    /// Alters the button names based on the current controller type.
+    void UpdateControllerButtonNames();
+
     /// Gets the default controller mapping for this device and auto configures the input to match.
     void UpdateMappingWithDefaults();
 
diff --git a/src/yuzu/configuration/configure_input_player_widget.cpp b/src/yuzu/configuration/configure_input_player_widget.cpp
index e77ccc057..61ba91cef 100644
--- a/src/yuzu/configuration/configure_input_player_widget.cpp
+++ b/src/yuzu/configuration/configure_input_player_widget.cpp
@@ -37,7 +37,8 @@ void PlayerControlPreview::SetPlayerInput(std::size_t index, const ButtonParam&
                    Input::CreateDevice<Input::AnalogDevice>);
     UpdateColors();
 }
-void PlayerControlPreview::SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw buttons_,
+void PlayerControlPreview::SetPlayerInputRaw(std::size_t index,
+                                             const Settings::ButtonsRaw& buttons_,
                                              Settings::AnalogsRaw analogs_) {
     player_index = index;
     std::transform(buttons_.begin() + Settings::NativeButton::BUTTON_HID_BEGIN,
@@ -226,6 +227,9 @@ void PlayerControlPreview::paintEvent(QPaintEvent* event) {
     case Settings::ControllerType::RightJoycon:
         DrawRightController(p, center);
         break;
+    case Settings::ControllerType::GameCube:
+        DrawGCController(p, center);
+        break;
     case Settings::ControllerType::ProController:
     default:
         DrawProController(p, center);
@@ -517,14 +521,15 @@ void PlayerControlPreview::DrawDualController(QPainter& p, const QPointF center)
     {
         // Draw joysticks
         using namespace Settings::NativeAnalog;
-        DrawJoystick(p, center + QPointF(-65, -65) + (axis_values[LStick].value * 7), 1.62f,
-                     button_values[Settings::NativeButton::LStick]);
-        DrawJoystick(p, center + QPointF(65, 12) + (axis_values[RStick].value * 7), 1.62f,
-                     button_values[Settings::NativeButton::RStick]);
-        DrawRawJoystick(p, center + QPointF(-180, 90), axis_values[LStick].raw_value,
-                        axis_values[LStick].properties);
-        DrawRawJoystick(p, center + QPointF(180, 90), axis_values[RStick].raw_value,
-                        axis_values[RStick].properties);
+        const auto& l_stick = axis_values[LStick];
+        const auto l_button = button_values[Settings::NativeButton::LStick];
+        const auto& r_stick = axis_values[RStick];
+        const auto r_button = button_values[Settings::NativeButton::RStick];
+
+        DrawJoystick(p, center + QPointF(-65, -65) + (l_stick.value * 7), 1.62f, l_button);
+        DrawJoystick(p, center + QPointF(65, 12) + (r_stick.value * 7), 1.62f, r_button);
+        DrawRawJoystick(p, center + QPointF(-180, 90), l_stick.raw_value, l_stick.properties);
+        DrawRawJoystick(p, center + QPointF(180, 90), r_stick.raw_value, r_stick.properties);
     }
 
     using namespace Settings::NativeButton;
@@ -603,14 +608,15 @@ void PlayerControlPreview::DrawHandheldController(QPainter& p, const QPointF cen
     {
         // Draw joysticks
         using namespace Settings::NativeAnalog;
-        DrawJoystick(p, center + QPointF(-171, -41) + (axis_values[LStick].value * 4), 1.0f,
-                     button_values[Settings::NativeButton::LStick]);
-        DrawJoystick(p, center + QPointF(171, 8) + (axis_values[RStick].value * 4), 1.0f,
-                     button_values[Settings::NativeButton::RStick]);
-        DrawRawJoystick(p, center + QPointF(-50, 0), axis_values[LStick].raw_value,
-                        axis_values[LStick].properties);
-        DrawRawJoystick(p, center + QPointF(50, 0), axis_values[RStick].raw_value,
-                        axis_values[RStick].properties);
+        const auto& l_stick = axis_values[LStick];
+        const auto l_button = button_values[Settings::NativeButton::LStick];
+        const auto& r_stick = axis_values[RStick];
+        const auto r_button = button_values[Settings::NativeButton::RStick];
+
+        DrawJoystick(p, center + QPointF(-171, -41) + (l_stick.value * 4), 1.0f, l_button);
+        DrawJoystick(p, center + QPointF(171, 8) + (r_stick.value * 4), 1.0f, r_button);
+        DrawRawJoystick(p, center + QPointF(-50, 0), l_stick.raw_value, l_stick.properties);
+        DrawRawJoystick(p, center + QPointF(50, 0), r_stick.raw_value, r_stick.properties);
     }
 
     using namespace Settings::NativeButton;
@@ -1002,12 +1008,6 @@ constexpr std::array<float, 3 * 2> up_arrow_symbol = {
     0.0f, -3.0f, -3.0f, 2.0f, 3.0f, 2.0f,
 };
 
-constexpr std::array<float, 13 * 2> up_arrow = {
-    9.4f,   -9.8f,  9.4f,   -10.2f, 8.9f,   -29.8f, 8.5f,   -30.0f, 8.1f,
-    -30.1f, 7.7f,   -30.1f, -8.6f,  -30.0f, -9.0f,  -29.8f, -9.3f,  -29.5f,
-    -9.5f,  -29.1f, -9.5f,  -28.7f, -9.1f,  -9.1f,  -8.8f,  -8.8f,
-};
-
 constexpr std::array<float, 64 * 2> trigger_button = {
     5.5f,   -12.6f, 5.8f,   -12.6f, 6.7f,   -12.5f, 8.1f,   -12.3f, 8.6f,   -12.2f, 9.2f,   -12.0f,
     9.5f,   -11.9f, 9.9f,   -11.8f, 10.6f,  -11.5f, 11.0f,  -11.3f, 11.2f,  -11.2f, 11.4f,  -11.1f,
@@ -1457,15 +1457,18 @@ void PlayerControlPreview::DrawProBody(QPainter& p, const QPointF center) {
     constexpr int radius1 = 32;
 
     for (std::size_t point = 0; point < pro_left_handle.size() / 2; ++point) {
-        qleft_handle[point] =
-            center + QPointF(pro_left_handle[point * 2], pro_left_handle[point * 2 + 1]);
-        qright_handle[point] =
-            center + QPointF(-pro_left_handle[point * 2], pro_left_handle[point * 2 + 1]);
+        const float left_x = pro_left_handle[point * 2 + 0];
+        const float left_y = pro_left_handle[point * 2 + 1];
+
+        qleft_handle[point] = center + QPointF(left_x, left_y);
+        qright_handle[point] = center + QPointF(-left_x, left_y);
     }
     for (std::size_t point = 0; point < pro_body.size() / 2; ++point) {
-        qbody[point] = center + QPointF(pro_body[point * 2], pro_body[point * 2 + 1]);
-        qbody[pro_body.size() - 1 - point] =
-            center + QPointF(-pro_body[point * 2], pro_body[point * 2 + 1]);
+        const float body_x = pro_body[point * 2 + 0];
+        const float body_y = pro_body[point * 2 + 1];
+
+        qbody[point] = center + QPointF(body_x, body_y);
+        qbody[pro_body.size() - 1 - point] = center + QPointF(-body_x, body_y);
     }
 
     // Draw left handle body
@@ -1496,21 +1499,25 @@ void PlayerControlPreview::DrawGCBody(QPainter& p, const QPointF center) {
     constexpr float angle = 2 * 3.1415f / 8;
 
     for (std::size_t point = 0; point < gc_left_body.size() / 2; ++point) {
-        qleft_handle[point] =
-            center + QPointF(gc_left_body[point * 2], gc_left_body[point * 2 + 1]);
-        qright_handle[point] =
-            center + QPointF(-gc_left_body[point * 2], gc_left_body[point * 2 + 1]);
+        const float body_x = gc_left_body[point * 2 + 0];
+        const float body_y = gc_left_body[point * 2 + 1];
+
+        qleft_handle[point] = center + QPointF(body_x, body_y);
+        qright_handle[point] = center + QPointF(-body_x, body_y);
     }
     for (std::size_t point = 0; point < gc_body.size() / 2; ++point) {
-        qbody[point] = center + QPointF(gc_body[point * 2], gc_body[point * 2 + 1]);
-        qbody[gc_body.size() - 1 - point] =
-            center + QPointF(-gc_body[point * 2], gc_body[point * 2 + 1]);
+        const float body_x = gc_body[point * 2 + 0];
+        const float body_y = gc_body[point * 2 + 1];
+
+        qbody[point] = center + QPointF(body_x, body_y);
+        qbody[gc_body.size() - 1 - point] = center + QPointF(-body_x, body_y);
     }
     for (std::size_t point = 0; point < 8; ++point) {
-        left_hex[point] =
-            center + QPointF(34 * std::cos(point * angle) - 111, 34 * std::sin(point * angle) - 44);
-        right_hex[point] =
-            center + QPointF(26 * std::cos(point * angle) + 61, 26 * std::sin(point * angle) + 37);
+        const float point_cos = std::cos(point * angle);
+        const float point_sin = std::sin(point * angle);
+
+        left_hex[point] = center + QPointF(34 * point_cos - 111, 34 * point_sin - 44);
+        right_hex[point] = center + QPointF(26 * point_cos + 61, 26 * point_sin + 37);
     }
 
     // Draw body
@@ -1631,32 +1638,36 @@ void PlayerControlPreview::DrawDualBody(QPainter& p, const QPointF center) {
     constexpr float offset = 209.3f;
 
     for (std::size_t point = 0; point < left_joycon_body.size() / 2; ++point) {
-        left_joycon[point] = center + QPointF(left_joycon_body[point * 2] * size + offset,
-                                              left_joycon_body[point * 2 + 1] * size - 1);
-        right_joycon[point] = center + QPointF(-left_joycon_body[point * 2] * size - offset,
-                                               left_joycon_body[point * 2 + 1] * size - 1);
+        const float body_x = left_joycon_body[point * 2 + 0];
+        const float body_y = left_joycon_body[point * 2 + 1];
+
+        left_joycon[point] = center + QPointF(body_x * size + offset, body_y * size - 1);
+        right_joycon[point] = center + QPointF(-body_x * size - offset, body_y * size - 1);
     }
     for (std::size_t point = 0; point < left_joycon_slider.size() / 2; ++point) {
-        qleft_joycon_slider[point] =
-            center + QPointF(left_joycon_slider[point * 2], left_joycon_slider[point * 2 + 1]);
-        qright_joycon_slider[point] =
-            center + QPointF(-left_joycon_slider[point * 2], left_joycon_slider[point * 2 + 1]);
+        const float slider_x = left_joycon_slider[point * 2 + 0];
+        const float slider_y = left_joycon_slider[point * 2 + 1];
+
+        qleft_joycon_slider[point] = center + QPointF(slider_x, slider_y);
+        qright_joycon_slider[point] = center + QPointF(-slider_x, slider_y);
     }
     for (std::size_t point = 0; point < left_joycon_topview.size() / 2; ++point) {
+        const float top_view_x = left_joycon_topview[point * 2 + 0];
+        const float top_view_y = left_joycon_topview[point * 2 + 1];
+
         qleft_joycon_topview[point] =
-            center + QPointF(left_joycon_topview[point * 2] * size2 - 52,
-                             left_joycon_topview[point * 2 + 1] * size2 - 52);
+            center + QPointF(top_view_x * size2 - 52, top_view_y * size2 - 52);
         qright_joycon_topview[point] =
-            center + QPointF(-left_joycon_topview[point * 2] * size2 + 52,
-                             left_joycon_topview[point * 2 + 1] * size2 - 52);
+            center + QPointF(-top_view_x * size2 + 52, top_view_y * size2 - 52);
     }
     for (std::size_t point = 0; point < left_joycon_slider_topview.size() / 2; ++point) {
+        const float top_view_x = left_joycon_slider_topview[point * 2 + 0];
+        const float top_view_y = left_joycon_slider_topview[point * 2 + 1];
+
         qleft_joycon_slider_topview[point] =
-            center + QPointF(left_joycon_slider_topview[point * 2] * size2 - 52,
-                             left_joycon_slider_topview[point * 2 + 1] * size2 - 52);
+            center + QPointF(top_view_x * size2 - 52, top_view_y * size2 - 52);
         qright_joycon_slider_topview[point] =
-            center + QPointF(-left_joycon_slider_topview[point * 2] * size2 + 52,
-                             left_joycon_slider_topview[point * 2 + 1] * size2 - 52);
+            center + QPointF(-top_view_x * size2 + 52, top_view_y * size2 - 52);
     }
 
     // right joycon body
@@ -1905,18 +1916,19 @@ void PlayerControlPreview::DrawProTriggers(QPainter& p, const QPointF center, bo
     std::array<QPointF, pro_body_top.size()> qbody_top;
 
     for (std::size_t point = 0; point < pro_left_trigger.size() / 2; ++point) {
-        qleft_trigger[point] =
-            center + QPointF(pro_left_trigger[point * 2],
-                             pro_left_trigger[point * 2 + 1] + (left_pressed ? 2 : 0));
-        qright_trigger[point] =
-            center + QPointF(-pro_left_trigger[point * 2],
-                             pro_left_trigger[point * 2 + 1] + (right_pressed ? 2 : 0));
+        const float trigger_x = pro_left_trigger[point * 2 + 0];
+        const float trigger_y = pro_left_trigger[point * 2 + 1];
+
+        qleft_trigger[point] = center + QPointF(trigger_x, trigger_y + (left_pressed ? 2 : 0));
+        qright_trigger[point] = center + QPointF(-trigger_x, trigger_y + (right_pressed ? 2 : 0));
     }
 
     for (std::size_t point = 0; point < pro_body_top.size() / 2; ++point) {
-        qbody_top[pro_body_top.size() - 1 - point] =
-            center + QPointF(-pro_body_top[point * 2], pro_body_top[point * 2 + 1]);
-        qbody_top[point] = center + QPointF(pro_body_top[point * 2], pro_body_top[point * 2 + 1]);
+        const float top_x = pro_body_top[point * 2 + 0];
+        const float top_y = pro_body_top[point * 2 + 1];
+
+        qbody_top[pro_body_top.size() - 1 - point] = center + QPointF(-top_x, top_y);
+        qbody_top[point] = center + QPointF(top_x, top_y);
     }
 
     // Pro body detail
@@ -1939,12 +1951,11 @@ void PlayerControlPreview::DrawGCTriggers(QPainter& p, const QPointF center, boo
     std::array<QPointF, left_gc_trigger.size() / 2> qright_trigger;
 
     for (std::size_t point = 0; point < left_gc_trigger.size() / 2; ++point) {
-        qleft_trigger[point] =
-            center + QPointF(left_gc_trigger[point * 2],
-                             left_gc_trigger[point * 2 + 1] + (left_pressed ? 10 : 0));
-        qright_trigger[point] =
-            center + QPointF(-left_gc_trigger[point * 2],
-                             left_gc_trigger[point * 2 + 1] + (right_pressed ? 10 : 0));
+        const float trigger_x = left_gc_trigger[point * 2 + 0];
+        const float trigger_y = left_gc_trigger[point * 2 + 1];
+
+        qleft_trigger[point] = center + QPointF(trigger_x, trigger_y + (left_pressed ? 10 : 0));
+        qright_trigger[point] = center + QPointF(-trigger_x, trigger_y + (right_pressed ? 10 : 0));
     }
 
     // Left trigger
@@ -1973,12 +1984,13 @@ void PlayerControlPreview::DrawHandheldTriggers(QPainter& p, const QPointF cente
     std::array<QPointF, left_joycon_trigger.size() / 2> qright_trigger;
 
     for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) {
+        const float left_trigger_x = left_joycon_trigger[point * 2 + 0];
+        const float left_trigger_y = left_joycon_trigger[point * 2 + 1];
+
         qleft_trigger[point] =
-            center + QPointF(left_joycon_trigger[point * 2],
-                             left_joycon_trigger[point * 2 + 1] + (left_pressed ? 0.5f : 0));
+            center + QPointF(left_trigger_x, left_trigger_y + (left_pressed ? 0.5f : 0));
         qright_trigger[point] =
-            center + QPointF(-left_joycon_trigger[point * 2],
-                             left_joycon_trigger[point * 2 + 1] + (right_pressed ? 0.5f : 0));
+            center + QPointF(-left_trigger_x, left_trigger_y + (right_pressed ? 0.5f : 0));
     }
 
     // Left trigger
@@ -1998,12 +2010,14 @@ void PlayerControlPreview::DrawDualTriggers(QPainter& p, const QPointF center, b
     constexpr float size = 1.62f;
     constexpr float offset = 210.6f;
     for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) {
-        qleft_trigger[point] =
-            center + QPointF(left_joycon_trigger[point * 2] * size + offset,
-                             left_joycon_trigger[point * 2 + 1] * size + (left_pressed ? 0.5f : 0));
-        qright_trigger[point] = center + QPointF(-left_joycon_trigger[point * 2] * size - offset,
-                                                 left_joycon_trigger[point * 2 + 1] * size +
-                                                     (right_pressed ? 0.5f : 0));
+        const float left_trigger_x = left_joycon_trigger[point * 2 + 0];
+        const float left_trigger_y = left_joycon_trigger[point * 2 + 1];
+
+        qleft_trigger[point] = center + QPointF(left_trigger_x * size + offset,
+                                                left_trigger_y * size + (left_pressed ? 0.5f : 0));
+        qright_trigger[point] =
+            center + QPointF(-left_trigger_x * size - offset,
+                             left_trigger_y * size + (right_pressed ? 0.5f : 0));
     }
 
     // Left trigger
@@ -2023,13 +2037,16 @@ void PlayerControlPreview::DrawDualTriggersTopView(QPainter& p, const QPointF ce
     constexpr float size = 0.9f;
 
     for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) {
-        qleft_trigger[point] = center + QPointF(left_joystick_L_topview[point * 2] * size - 50,
-                                                left_joystick_L_topview[point * 2 + 1] * size - 52);
+        const float top_view_x = left_joystick_L_topview[point * 2 + 0];
+        const float top_view_y = left_joystick_L_topview[point * 2 + 1];
+
+        qleft_trigger[point] = center + QPointF(top_view_x * size - 50, top_view_y * size - 52);
     }
     for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) {
-        qright_trigger[point] =
-            center + QPointF(-left_joystick_L_topview[point * 2] * size + 50,
-                             left_joystick_L_topview[point * 2 + 1] * size - 52);
+        const float top_view_x = left_joystick_L_topview[point * 2 + 0];
+        const float top_view_y = left_joystick_L_topview[point * 2 + 1];
+
+        qright_trigger[point] = center + QPointF(-top_view_x * size + 50, top_view_y * size - 52);
     }
 
     p.setPen(colors.outline);
@@ -2323,7 +2340,7 @@ void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, boo
 }
 
 void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, const QPointF value,
-                                           const Input::AnalogProperties properties) {
+                                           const Input::AnalogProperties& properties) {
     constexpr float size = 45.0f;
     const float range = size * properties.range;
     const float deadzone = size * properties.deadzone;
@@ -2446,17 +2463,16 @@ void PlayerControlPreview::DrawArrowButtonOutline(QPainter& p, const QPointF cen
     std::array<QPointF, (arrow_points - 1) * 4> arrow_button_outline;
 
     for (std::size_t point = 0; point < arrow_points - 1; ++point) {
-        arrow_button_outline[point] = center + QPointF(up_arrow_button[point * 2] * size,
-                                                       up_arrow_button[point * 2 + 1] * size);
+        const float up_arrow_x = up_arrow_button[point * 2 + 0];
+        const float up_arrow_y = up_arrow_button[point * 2 + 1];
+
+        arrow_button_outline[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
         arrow_button_outline[(arrow_points - 1) * 2 - point - 1] =
-            center +
-            QPointF(up_arrow_button[point * 2 + 1] * size, up_arrow_button[point * 2] * size);
+            center + QPointF(up_arrow_y * size, up_arrow_x * size);
         arrow_button_outline[(arrow_points - 1) * 2 + point] =
-            center +
-            QPointF(-up_arrow_button[point * 2] * size, -up_arrow_button[point * 2 + 1] * size);
+            center + QPointF(-up_arrow_x * size, -up_arrow_y * size);
         arrow_button_outline[(arrow_points - 1) * 4 - point - 1] =
-            center +
-            QPointF(-up_arrow_button[point * 2 + 1] * size, -up_arrow_button[point * 2] * size);
+            center + QPointF(-up_arrow_y * size, -up_arrow_x * size);
     }
     // Draw arrow button outline
     p.setPen(colors.outline);
@@ -2470,22 +2486,21 @@ void PlayerControlPreview::DrawArrowButton(QPainter& p, const QPointF center,
     QPoint offset;
 
     for (std::size_t point = 0; point < up_arrow_button.size() / 2; ++point) {
+        const float up_arrow_x = up_arrow_button[point * 2 + 0];
+        const float up_arrow_y = up_arrow_button[point * 2 + 1];
+
         switch (direction) {
         case Direction::Up:
-            arrow_button[point] = center + QPointF(up_arrow_button[point * 2] * size,
-                                                   up_arrow_button[point * 2 + 1] * size);
+            arrow_button[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
             break;
         case Direction::Left:
-            arrow_button[point] = center + QPointF(up_arrow_button[point * 2 + 1] * size,
-                                                   up_arrow_button[point * 2] * size);
+            arrow_button[point] = center + QPointF(up_arrow_y * size, up_arrow_x * size);
             break;
         case Direction::Right:
-            arrow_button[point] = center + QPointF(-up_arrow_button[point * 2 + 1] * size,
-                                                   up_arrow_button[point * 2] * size);
+            arrow_button[point] = center + QPointF(-up_arrow_y * size, up_arrow_x * size);
             break;
         case Direction::Down:
-            arrow_button[point] = center + QPointF(up_arrow_button[point * 2] * size,
-                                                   -up_arrow_button[point * 2 + 1] * size);
+            arrow_button[point] = center + QPointF(up_arrow_x * size, -up_arrow_y * size);
             break;
         case Direction::None:
             break;
@@ -2524,17 +2539,17 @@ void PlayerControlPreview::DrawArrowButton(QPainter& p, const QPointF center,
 void PlayerControlPreview::DrawTriggerButton(QPainter& p, const QPointF center,
                                              const Direction direction, bool pressed) {
     std::array<QPointF, trigger_button.size() / 2> qtrigger_button;
-    QPoint offset;
 
     for (std::size_t point = 0; point < trigger_button.size() / 2; ++point) {
+        const float trigger_button_x = trigger_button[point * 2 + 0];
+        const float trigger_button_y = trigger_button[point * 2 + 1];
+
         switch (direction) {
         case Direction::Left:
-            qtrigger_button[point] =
-                center + QPointF(-trigger_button[point * 2], trigger_button[point * 2 + 1]);
+            qtrigger_button[point] = center + QPointF(-trigger_button_x, trigger_button_y);
             break;
         case Direction::Right:
-            qtrigger_button[point] =
-                center + QPointF(trigger_button[point * 2], trigger_button[point * 2 + 1]);
+            qtrigger_button[point] = center + QPointF(trigger_button_x, trigger_button_y);
             break;
         case Direction::Up:
         case Direction::Down:
@@ -2657,22 +2672,21 @@ void PlayerControlPreview::DrawArrow(QPainter& p, const QPointF center, const Di
     std::array<QPointF, up_arrow_symbol.size() / 2> arrow_symbol;
 
     for (std::size_t point = 0; point < up_arrow_symbol.size() / 2; ++point) {
+        const float up_arrow_x = up_arrow_symbol[point * 2 + 0];
+        const float up_arrow_y = up_arrow_symbol[point * 2 + 1];
+
         switch (direction) {
         case Direction::Up:
-            arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2] * size,
-                                                   up_arrow_symbol[point * 2 + 1] * size);
+            arrow_symbol[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
             break;
         case Direction::Left:
-            arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2 + 1] * size,
-                                                   up_arrow_symbol[point * 2] * size);
+            arrow_symbol[point] = center + QPointF(up_arrow_y * size, up_arrow_x * size);
             break;
         case Direction::Right:
-            arrow_symbol[point] = center + QPointF(-up_arrow_symbol[point * 2 + 1] * size,
-                                                   up_arrow_symbol[point * 2] * size);
+            arrow_symbol[point] = center + QPointF(-up_arrow_y * size, up_arrow_x * size);
             break;
         case Direction::Down:
-            arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2] * size,
-                                                   -up_arrow_symbol[point * 2 + 1] * size);
+            arrow_symbol[point] = center + QPointF(up_arrow_x * size, -up_arrow_y * size);
             break;
         case Direction::None:
             break;
diff --git a/src/yuzu/configuration/configure_input_player_widget.h b/src/yuzu/configuration/configure_input_player_widget.h
index 676effbfd..91c3343f1 100644
--- a/src/yuzu/configuration/configure_input_player_widget.h
+++ b/src/yuzu/configuration/configure_input_player_widget.h
@@ -25,7 +25,7 @@ public:
 
     void SetPlayerInput(std::size_t index, const ButtonParam& buttons_param,
                         const AnalogParam& analogs_param);
-    void SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw buttons_,
+    void SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw& buttons_,
                            Settings::AnalogsRaw analogs_);
     void SetConnectedStatus(bool checked);
     void SetControllerType(Settings::ControllerType type);
@@ -138,8 +138,8 @@ private:
     // Draw joystick functions
     void DrawJoystick(QPainter& p, QPointF center, float size, bool pressed);
     void DrawJoystickSideview(QPainter& p, QPointF center, float angle, float size, bool pressed);
-    void DrawRawJoystick(QPainter& p, QPointF center, const QPointF value,
-                         const Input::AnalogProperties properties);
+    void DrawRawJoystick(QPainter& p, QPointF center, QPointF value,
+                         const Input::AnalogProperties& properties);
     void DrawProJoystick(QPainter& p, QPointF center, QPointF offset, float scalar, bool pressed);
     void DrawGCJoystick(QPainter& p, QPointF center, bool pressed);
 
diff --git a/src/yuzu/debugger/controller.cpp b/src/yuzu/debugger/controller.cpp
index 85724a8f3..2731d948d 100644
--- a/src/yuzu/debugger/controller.cpp
+++ b/src/yuzu/debugger/controller.cpp
@@ -42,7 +42,7 @@ void ControllerDialog::refreshConfiguration() {
 
 QAction* ControllerDialog::toggleViewAction() {
     if (toggle_view_action == nullptr) {
-        toggle_view_action = new QAction(windowTitle(), this);
+        toggle_view_action = new QAction(tr("&Controller P1"), this);
         toggle_view_action->setCheckable(true);
         toggle_view_action->setChecked(isVisible());
         connect(toggle_view_action, &QAction::toggled, this, &ControllerDialog::setVisible);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ef92c25bc..0ba7c07cc 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -850,6 +850,16 @@ void GMainWindow::InitializeHotkeys() {
     connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this),
             &QShortcut::activated, this,
             [] { Settings::values.audio_muted = !Settings::values.audio_muted; });
+
+    connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Mouse Panning"), this),
+            &QShortcut::activated, this, [&] {
+                Settings::values.mouse_panning = !Settings::values.mouse_panning;
+                if (UISettings::values.hide_mouse || Settings::values.mouse_panning) {
+                    mouse_hide_timer.start();
+                    render_window->installEventFilter(render_window);
+                    render_window->setAttribute(Qt::WA_Hover, true);
+                }
+            });
 }
 
 void GMainWindow::SetDefaultUIGeometry() {
@@ -1197,7 +1207,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) {
     multicore_status_button->setDisabled(true);
     renderer_status_button->setDisabled(true);
 
-    if (UISettings::values.hide_mouse) {
+    if (UISettings::values.hide_mouse || Settings::values.mouse_panning) {
         mouse_hide_timer.start();
         render_window->installEventFilter(render_window);
         render_window->setAttribute(Qt::WA_Hover, true);
@@ -2359,7 +2369,7 @@ void GMainWindow::OnConfigure() {
 
     config->Save();
 
-    if (UISettings::values.hide_mouse && emulation_running) {
+    if ((UISettings::values.hide_mouse || Settings::values.mouse_panning) && emulation_running) {
         render_window->installEventFilter(render_window);
         render_window->setAttribute(Qt::WA_Hover, true);
         mouse_hide_timer.start();
@@ -2480,6 +2490,11 @@ void GMainWindow::OnCaptureScreenshot() {
                            .arg(title_id, 16, 16, QLatin1Char{'0'})
                            .arg(date);
 
+    if (!Common::FS::CreateDir(screenshot_path.toStdString())) {
+        OnStartGame();
+        return;
+    }
+
 #ifdef _WIN32
     if (UISettings::values.enable_screenshot_save_as) {
         filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename,
@@ -2600,7 +2615,8 @@ void GMainWindow::UpdateUISettings() {
 }
 
 void GMainWindow::HideMouseCursor() {
-    if (emu_thread == nullptr || UISettings::values.hide_mouse == false) {
+    if (emu_thread == nullptr ||
+        (!UISettings::values.hide_mouse && !Settings::values.mouse_panning)) {
         mouse_hide_timer.stop();
         ShowMouseCursor();
         return;
@@ -2610,13 +2626,16 @@ void GMainWindow::HideMouseCursor() {
 
 void GMainWindow::ShowMouseCursor() {
     render_window->unsetCursor();
-    if (emu_thread != nullptr && UISettings::values.hide_mouse) {
+    if (emu_thread != nullptr &&
+        (UISettings::values.hide_mouse || Settings::values.mouse_panning)) {
         mouse_hide_timer.start();
     }
 }
 
 void GMainWindow::OnMouseActivity() {
-    ShowMouseCursor();
+    if (!Settings::values.mouse_panning) {
+        ShowMouseCursor();
+    }
 }
 
 void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string details) {
@@ -2751,7 +2770,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
                     .arg(errors));
         }
 
-        QProgressDialog prog;
+        QProgressDialog prog(this);
         prog.setRange(0, 0);
         prog.setLabelText(tr("Deriving keys...\nThis may take up to a minute depending \non your "
                              "system's performance."));
@@ -2933,7 +2952,7 @@ void GMainWindow::filterBarSetChecked(bool state) {
 }
 
 void GMainWindow::UpdateUITheme() {
-    const QString default_icons = QStringLiteral(":/icons/default");
+    const QString default_icons = QStringLiteral("default");
     const QString& current_theme = UISettings::values.theme;
     const bool is_default_theme = current_theme == QString::fromUtf8(UISettings::themes[0].second);
     QStringList theme_paths(default_theme_paths);
@@ -2949,7 +2968,6 @@ void GMainWindow::UpdateUITheme() {
             qApp->setStyleSheet({});
             setStyleSheet({});
         }
-        theme_paths.append(default_icons);
         QIcon::setThemeName(default_icons);
     } else {
         const QString theme_uri(QLatin1Char{':'} + current_theme + QStringLiteral("/style.qss"));
@@ -2961,10 +2979,7 @@ void GMainWindow::UpdateUITheme() {
         } else {
             LOG_ERROR(Frontend, "Unable to set style, stylesheet file not found");
         }
-
-        const QString theme_name = QStringLiteral(":/icons/") + current_theme;
-        theme_paths.append({default_icons, theme_name});
-        QIcon::setThemeName(theme_name);
+        QIcon::setThemeName(current_theme);
     }
 
     QIcon::setThemeSearchPaths(theme_paths);
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index e2ad5baf6..048870687 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -14,8 +14,8 @@
    <string>yuzu</string>
   </property>
   <property name="windowIcon">
-   <iconset>
-    <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset>
+   <iconset resource="yuzu.qrc">
+    <normaloff>:/img/yuzu.ico</normaloff>:/img/yuzu.ico</iconset>
   </property>
   <property name="tabShape">
    <enum>QTabWidget::Rounded</enum>
@@ -303,6 +303,8 @@
    </property>
   </action>
  </widget>
- <resources/>
+ <resources>
+  <include location="yuzu.qrc"/>
+ </resources>
  <connections/>
 </ui>
diff --git a/src/yuzu/yuzu.qrc b/src/yuzu/yuzu.qrc
new file mode 100644
index 000000000..5733cac98
--- /dev/null
+++ b/src/yuzu/yuzu.qrc
@@ -0,0 +1,5 @@
+<RCC>
+    <qresource prefix="/img">
+        <file alias="yuzu.ico">../../dist/yuzu.ico</file>
+    </qresource>
+</RCC>
diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt
index 0b3f2cb54..8461f8896 100644
--- a/src/yuzu_cmd/CMakeLists.txt
+++ b/src/yuzu_cmd/CMakeLists.txt
@@ -1,5 +1,15 @@
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
 
+function(create_resource file output filename)
+    # Read hex data from file
+    file(READ ${file} filedata HEX)
+    # Convert hex data for C compatibility
+    string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
+    # Write data to output file
+    set(RESOURCES_DIR "${PROJECT_BINARY_DIR}/dist" PARENT_SCOPE)
+    file(WRITE "${PROJECT_BINARY_DIR}/dist/${output}" "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
+endfunction()
+
 add_executable(yuzu-cmd
     config.cpp
     config.h
@@ -24,6 +34,9 @@ if (MSVC)
 endif()
 target_link_libraries(yuzu-cmd PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads)
 
+create_resource("../../dist/yuzu.bmp" "yuzu_cmd/yuzu_icon.h" "yuzu_icon")
+target_include_directories(yuzu-cmd PRIVATE ${RESOURCES_DIR})
+
 target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include)
 
 if(UNIX AND NOT APPLE)
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index f76102459..6d8bc5509 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -329,9 +329,6 @@ void Config::ReadValues() {
     FS::GetUserPath(
         FS::UserPath::DumpDir,
         sdl2_config->Get("Data Storage", "dump_directory", FS::GetUserPath(FS::UserPath::DumpDir)));
-    FS::GetUserPath(FS::UserPath::CacheDir,
-                    sdl2_config->Get("Data Storage", "cache_directory",
-                                     FS::GetUserPath(FS::UserPath::CacheDir)));
     Settings::values.gamecard_inserted =
         sdl2_config->GetBoolean("Data Storage", "gamecard_inserted", false);
     Settings::values.gamecard_current_game =
@@ -388,7 +385,7 @@ void Config::ReadValues() {
         static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)));
     Settings::values.use_disk_shader_cache.SetValue(
         sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false));
-    const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
+    const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 1);
     Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level));
     Settings::values.use_asynchronous_gpu_emulation.SetValue(
         sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true));
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 7843d5167..7e391ab89 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -12,6 +12,7 @@
 #include "input_common/mouse/mouse_input.h"
 #include "input_common/sdl/sdl.h"
 #include "yuzu_cmd/emu_window/emu_window_sdl2.h"
+#include "yuzu_cmd/yuzu_icon.h"
 
 EmuWindow_SDL2::EmuWindow_SDL2(InputCommon::InputSubsystem* input_subsystem_)
     : input_subsystem{input_subsystem_} {
@@ -30,7 +31,8 @@ EmuWindow_SDL2::~EmuWindow_SDL2() {
 
 void EmuWindow_SDL2::OnMouseMotion(s32 x, s32 y) {
     TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0), 0);
-    input_subsystem->GetMouse()->MouseMove(x, y);
+
+    input_subsystem->GetMouse()->MouseMove(x, y, 0, 0);
 }
 
 void EmuWindow_SDL2::OnMouseButton(u32 button, u8 state, s32 x, s32 y) {
@@ -193,6 +195,22 @@ void EmuWindow_SDL2::WaitEvent() {
     }
 }
 
+void EmuWindow_SDL2::SetWindowIcon() {
+    SDL_RWops* const yuzu_icon_stream = SDL_RWFromConstMem((void*)yuzu_icon, yuzu_icon_size);
+    if (yuzu_icon_stream == nullptr) {
+        LOG_WARNING(Frontend, "Failed to create yuzu icon stream.");
+        return;
+    }
+    SDL_Surface* const window_icon = SDL_LoadBMP_RW(yuzu_icon_stream, 1);
+    if (window_icon == nullptr) {
+        LOG_WARNING(Frontend, "Failed to read BMP from stream.");
+        return;
+    }
+    // The icon is attached to the window pointer
+    SDL_SetWindowIcon(render_window, window_icon);
+    SDL_FreeSurface(window_icon);
+}
+
 void EmuWindow_SDL2::OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) {
     SDL_SetWindowMinimumSize(render_window, minimal_size.first, minimal_size.second);
 }
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
index a93141240..51a12a6a9 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
@@ -32,6 +32,9 @@ public:
     /// Wait for the next event on the main thread.
     void WaitEvent();
 
+    // Sets the window icon from yuzu.bmp
+    void SetWindowIcon();
+
 protected:
     /// Called by WaitEvent when a key is pressed or released.
     void OnKeyEvent(int key, u8 state);
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index deddea9ee..a02485c14 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -107,6 +107,8 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(InputCommon::InputSubsystem* input_subsyste
     dummy_window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0,
                                     SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL);
 
+    SetWindowIcon();
+
     if (fullscreen) {
         Fullscreen();
     }
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
index 3ba657c00..6f9b00461 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
@@ -35,6 +35,8 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsyste
         std::exit(EXIT_FAILURE);
     }
 
+    SetWindowIcon();
+
     switch (wm.subsystem) {
 #ifdef SDL_VIDEO_DRIVER_WINDOWS
     case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS:
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 0e1f3bdb3..982c41785 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
     // Core is loaded, start the GPU (makes the GPU contexts current to this thread)
     system.GPU().Start();
 
-    system.Renderer().Rasterizer().LoadDiskResources(
+    system.Renderer().ReadRasterizer()->LoadDiskResources(
         system.CurrentProcess()->GetTitleID(), false,
         [](VideoCore::LoadCallbackStage, size_t value, size_t total) {});