81 files changed, 5208 insertions, 646 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 9b0c3db68..9afc6105d 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -15,6 +15,10 @@ endif ()
 if (DEFINED ENV{DISPLAYVERSION})
   set(DISPLAY_VERSION $ENV{DISPLAYVERSION})
 endif ()
+
+# Pass the path to git to the GenerateSCMRev.cmake as well
+find_package(Git QUIET)
+
 add_custom_command(OUTPUT scm_rev.cpp
     COMMAND ${CMAKE_COMMAND}
       -DSRC_DIR="${CMAKE_SOURCE_DIR}"
@@ -23,6 +27,7 @@ add_custom_command(OUTPUT scm_rev.cpp
       -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}"
       -DBUILD_TAG="${BUILD_TAG}"
       -DBUILD_ID="${DISPLAY_VERSION}"
+      -DGIT_EXECUTABLE="${GIT_EXECUTABLE}"
       -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
     DEPENDS
       # WARNING! It was too much work to try and make a common location for this list,
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index f53a8d193..200c6489a 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -44,20 +44,6 @@ template class Field<std::string>;
 template class Field<const char*>;
 template class Field<std::chrono::microseconds>;
 
-#ifdef ARCHITECTURE_x86_64
-static const char* CpuVendorToStr(Common::CPUVendor vendor) {
-    switch (vendor) {
-    case Common::CPUVendor::INTEL:
-        return "Intel";
-    case Common::CPUVendor::AMD:
-        return "Amd";
-    case Common::CPUVendor::OTHER:
-        return "Other";
-    }
-    UNREACHABLE();
-}
-#endif
-
 void AppendBuildInfo(FieldCollection& fc) {
     const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr};
     fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty);
@@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) {
 #ifdef ARCHITECTURE_x86_64
     fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
     fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
-    fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor));
     fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
     fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
     fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index 2dfcd39c8..c9349a6b4 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -3,8 +3,6 @@
 // Refer to the license.txt file included.
 
 #include <cstring>
-#include <string>
-#include <thread>
 #include "common/common_types.h"
 #include "common/x64/cpu_detect.h"
 
@@ -51,8 +49,6 @@ namespace Common {
 static CPUCaps Detect() {
     CPUCaps caps = {};
 
-    caps.num_cores = std::thread::hardware_concurrency();
-
     // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
     // yuzu at all anyway
 
@@ -70,12 +66,6 @@ static CPUCaps Detect() {
     __cpuid(cpu_id, 0x80000000);
 
     u32 max_ex_fn = cpu_id[0];
-    if (!strcmp(caps.brand_string, "GenuineIntel"))
-        caps.vendor = CPUVendor::INTEL;
-    else if (!strcmp(caps.brand_string, "AuthenticAMD"))
-        caps.vendor = CPUVendor::AMD;
-    else
-        caps.vendor = CPUVendor::OTHER;
 
     // Set reasonable default brand string even if brand string not available
     strcpy(caps.cpu_string, caps.brand_string);
@@ -96,15 +86,9 @@ static CPUCaps Detect() {
             caps.sse4_1 = true;
         if ((cpu_id[2] >> 20) & 1)
             caps.sse4_2 = true;
-        if ((cpu_id[2] >> 22) & 1)
-            caps.movbe = true;
         if ((cpu_id[2] >> 25) & 1)
             caps.aes = true;
 
-        if ((cpu_id[3] >> 24) & 1) {
-            caps.fxsave_fxrstor = true;
-        }
-
         // AVX support requires 3 separate checks:
         //  - Is the AVX bit set in CPUID?
         //  - Is the XSAVE bit set in CPUID?
@@ -129,8 +113,6 @@ static CPUCaps Detect() {
         }
     }
 
-    caps.flush_to_zero = caps.sse;
-
     if (max_ex_fn >= 0x80000004) {
         // Extract CPU model string
         __cpuid(cpu_id, 0x80000002);
@@ -144,14 +126,8 @@ static CPUCaps Detect() {
     if (max_ex_fn >= 0x80000001) {
         // Check for more features
         __cpuid(cpu_id, 0x80000001);
-        if (cpu_id[2] & 1)
-            caps.lahf_sahf_64 = true;
-        if ((cpu_id[2] >> 5) & 1)
-            caps.lzcnt = true;
         if ((cpu_id[2] >> 16) & 1)
             caps.fma4 = true;
-        if ((cpu_id[3] >> 29) & 1)
-            caps.long_mode = true;
     }
 
     return caps;
@@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() {
     return caps;
 }
 
-std::string GetCPUCapsString() {
-    auto caps = GetCPUCaps();
-
-    std::string sum(caps.cpu_string);
-    sum += " (";
-    sum += caps.brand_string;
-    sum += ")";
-
-    if (caps.sse)
-        sum += ", SSE";
-    if (caps.sse2) {
-        sum += ", SSE2";
-        if (!caps.flush_to_zero)
-            sum += " (without DAZ)";
-    }
-
-    if (caps.sse3)
-        sum += ", SSE3";
-    if (caps.ssse3)
-        sum += ", SSSE3";
-    if (caps.sse4_1)
-        sum += ", SSE4.1";
-    if (caps.sse4_2)
-        sum += ", SSE4.2";
-    if (caps.avx)
-        sum += ", AVX";
-    if (caps.avx2)
-        sum += ", AVX2";
-    if (caps.bmi1)
-        sum += ", BMI1";
-    if (caps.bmi2)
-        sum += ", BMI2";
-    if (caps.fma)
-        sum += ", FMA";
-    if (caps.aes)
-        sum += ", AES";
-    if (caps.movbe)
-        sum += ", MOVBE";
-    if (caps.long_mode)
-        sum += ", 64-bit support";
-
-    return sum;
-}
-
 } // namespace Common
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 0af3a8adb..20f2ba234 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -4,23 +4,12 @@
 
 #pragma once
 
-#include <string>
-
 namespace Common {
 
-/// x86/x64 CPU vendors that may be detected by this module
-enum class CPUVendor {
-    INTEL,
-    AMD,
-    OTHER,
-};
-
 /// x86/x64 CPU capabilities that may be detected by this module
 struct CPUCaps {
-    CPUVendor vendor;
     char cpu_string[0x21];
     char brand_string[0x41];
-    int num_cores;
     bool sse;
     bool sse2;
     bool sse3;
@@ -35,20 +24,6 @@ struct CPUCaps {
     bool fma;
     bool fma4;
     bool aes;
-
-    // Support for the FXSAVE and FXRSTOR instructions
-    bool fxsave_fxrstor;
-
-    bool movbe;
-
-    // This flag indicates that the hardware supports some mode in which denormal inputs and outputs
-    // are automatically set to (signed) zero.
-    bool flush_to_zero;
-
-    // Support for LAHF and SAHF instructions in 64-bit mode
-    bool lahf_sahf_64;
-
-    bool long_mode;
 };
 
 /**
@@ -57,10 +32,4 @@ struct CPUCaps {
  */
 const CPUCaps& GetCPUCaps();
 
-/**
- * Gets a string summary of the name and supported capabilities of the host CPU
- * @return String summary
- */
-std::string GetCPUCapsString();
-
 } // namespace Common
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index f8c7f0efd..e825c0526 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
     config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
     config.page_table_address_space_bits = address_space_bits;
     config.silently_mirror_page_table = false;
+    config.absolute_offset_page_table = true;
 
     // Multi-process state
     config.processor_id = core_index;
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h
index 090565310..b689e8e8b 100644
--- a/src/core/hle/kernel/physical_memory.h
+++ b/src/core/hle/kernel/physical_memory.h
@@ -14,6 +14,9 @@ namespace Kernel {
 // - Second to ensure all host backing memory used is aligned to 256 bytes due
 // to strict alignment restrictions on GPU memory.
 
-using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
+using PhysicalMemoryVector = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
+class PhysicalMemory final : public PhysicalMemoryVector {
+    using PhysicalMemoryVector::PhysicalMemoryVector;
+};
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 12ea4ebe3..b9035a0be 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) {
 }
 
 void Process::LoadModule(CodeSet module_, VAddr base_addr) {
+    code_memory_size += module_.memory.size();
+
     const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));
 
     const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
@@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
     MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
     MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
     MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
-
-    code_memory_size += module_.memory.size();
 }
 
 Process::Process(Core::System& system)
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index a9a20ef76..0b3500fce 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <cstring>
 #include <iterator>
 #include <utility>
 #include "common/alignment.h"
@@ -269,18 +270,9 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
     // If necessary, expand backing vector to cover new heap extents in
     // the case of allocating. Otherwise, shrink the backing memory,
     // if a smaller heap has been requested.
-    const u64 old_heap_size = GetCurrentHeapSize();
-    if (size > old_heap_size) {
-        const u64 alloc_size = size - old_heap_size;
-
-        heap_memory->insert(heap_memory->end(), alloc_size, 0);
-        RefreshMemoryBlockMappings(heap_memory.get());
-    } else if (size < old_heap_size) {
-        heap_memory->resize(size);
-        heap_memory->shrink_to_fit();
-
-        RefreshMemoryBlockMappings(heap_memory.get());
-    }
+    heap_memory->resize(size);
+    heap_memory->shrink_to_fit();
+    RefreshMemoryBlockMappings(heap_memory.get());
 
     heap_end = heap_region_base + size;
     ASSERT(GetCurrentHeapSize() == heap_memory->size());
@@ -752,24 +744,20 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
     // Always merge allocated memory blocks, even when they don't share the same backing block.
     if (left.type == VMAType::AllocatedMemoryBlock &&
         (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
-        const auto right_begin = right.backing_block->begin() + right.offset;
-        const auto right_end = right_begin + right.size;
 
         // Check if we can save work.
         if (left.offset == 0 && left.size == left.backing_block->size()) {
             // Fast case: left is an entire backing block.
-            left.backing_block->insert(left.backing_block->end(), right_begin, right_end);
+            left.backing_block->resize(left.size + right.size);
+            std::memcpy(left.backing_block->data() + left.size,
+                        right.backing_block->data() + right.offset, right.size);
         } else {
             // Slow case: make a new memory block for left and right.
-            const auto left_begin = left.backing_block->begin() + left.offset;
-            const auto left_end = left_begin + left.size;
-            const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end));
-            const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end));
-
             auto new_memory = std::make_shared<PhysicalMemory>();
-            new_memory->reserve(left_size + right_size);
-            new_memory->insert(new_memory->end(), left_begin, left_end);
-            new_memory->insert(new_memory->end(), right_begin, right_end);
+            new_memory->resize(left.size + right.size);
+            std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size);
+            std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset,
+                        right.size);
 
             left.backing_block = std::move(new_memory);
             left.offset = 0;
@@ -792,8 +780,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
         memory.UnmapRegion(page_table, vma.base, vma.size);
         break;
     case VMAType::AllocatedMemoryBlock:
-        memory.MapMemoryRegion(page_table, vma.base, vma.size,
-                               vma.backing_block->data() + vma.offset);
+        memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset);
         break;
     case VMAType::BackingMemory:
         memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory);
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index 2e53b3221..767158444 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -9,6 +9,7 @@
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/nifm/nifm.h"
 #include "core/hle/service/service.h"
+#include "core/settings.h"
 
 namespace Service::NIFM {
 
@@ -86,7 +87,12 @@ private:
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
-        rb.PushEnum(RequestState::Connected);
+
+        if (Settings::values.bcat_backend == "none") {
+            rb.PushEnum(RequestState::NotSubmitted);
+        } else {
+            rb.PushEnum(RequestState::Connected);
+        }
     }
 
     void GetResult(Kernel::HLERequestContext& ctx) {
@@ -194,14 +200,22 @@ private:
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
-        rb.Push<u8>(1);
+        if (Settings::values.bcat_backend == "none") {
+            rb.Push<u8>(0);
+        } else {
+            rb.Push<u8>(1);
+        }
     }
     void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
         LOG_WARNING(Service_NIFM, "(STUBBED) called");
 
         IPC::ResponseBuilder rb{ctx, 3};
         rb.Push(RESULT_SUCCESS);
-        rb.Push<u8>(1);
+        if (Settings::values.bcat_backend == "none") {
+            rb.Push<u8>(0);
+        } else {
+            rb.Push<u8>(1);
+        }
     }
     Core::System& system;
 };
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 52623cf89..62752e419 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -88,6 +88,12 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
     return layer_id;
 }
 
+void NVFlinger::CloseLayer(u64 layer_id) {
+    for (auto& display : displays) {
+        display.CloseLayer(layer_id);
+    }
+}
+
 std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
     const auto* const layer = FindLayer(display_id, layer_id);
 
@@ -192,7 +198,7 @@ void NVFlinger::Compose() {
 
         const auto& igbp_buffer = buffer->get().igbp_buffer;
 
-        const auto& gpu = system.GPU();
+        auto& gpu = system.GPU();
         const auto& multi_fence = buffer->get().multi_fence;
         for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
             const auto& fence = multi_fence.fences[fence_id];
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index e3cc14bdc..57a21f33b 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -54,6 +54,9 @@ public:
     /// If an invalid display ID is specified, then an empty optional is returned.
     std::optional<u64> CreateLayer(u64 display_id);
 
+    /// Closes a layer on all displays for the given layer ID.
+    void CloseLayer(u64 layer_id);
+
     /// Finds the buffer queue ID of the specified layer in the specified display.
     ///
     /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
index cd18c1610..5a202ac81 100644
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -24,11 +24,11 @@ Display::Display(u64 id, std::string name, Core::System& system) : id{id}, name{
 Display::~Display() = default;
 
 Layer& Display::GetLayer(std::size_t index) {
-    return layers.at(index);
+    return *layers.at(index);
 }
 
 const Layer& Display::GetLayer(std::size_t index) const {
-    return layers.at(index);
+    return *layers.at(index);
 }
 
 std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
@@ -43,29 +43,38 @@ void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
     // TODO(Subv): Support more than 1 layer.
     ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
 
-    layers.emplace_back(id, buffer_queue);
+    layers.emplace_back(std::make_shared<Layer>(id, buffer_queue));
+}
+
+void Display::CloseLayer(u64 id) {
+    layers.erase(
+        std::remove_if(layers.begin(), layers.end(),
+                       [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }),
+        layers.end());
 }
 
 Layer* Display::FindLayer(u64 id) {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+    const auto itr =
+        std::find_if(layers.begin(), layers.end(),
+                     [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });
 
     if (itr == layers.end()) {
         return nullptr;
     }
 
-    return &*itr;
+    return itr->get();
 }
 
 const Layer* Display::FindLayer(u64 id) const {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+    const auto itr =
+        std::find_if(layers.begin(), layers.end(),
+                     [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });
 
     if (itr == layers.end()) {
         return nullptr;
     }
 
-    return &*itr;
+    return itr->get();
 }
 
 } // namespace Service::VI
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
index 8bb966a85..a3855d8cd 100644
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -69,6 +70,12 @@ public:
     ///
     void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
 
+    /// Closes and removes a layer from this display with the given ID.
+    ///
+    /// @param id           The ID assigned to the layer to close.
+    ///
+    void CloseLayer(u64 id);
+
     /// Attempts to find a layer with the given ID.
     ///
     /// @param id The layer ID.
@@ -91,7 +98,7 @@ private:
     u64 id;
     std::string name;
 
-    std::vector<Layer> layers;
+    std::vector<std::shared_ptr<Layer>> layers;
     Kernel::EventPair vsync_event;
 };
 
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 651c89dc0..519da74e0 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -1066,6 +1066,18 @@ private:
         rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
     }
 
+    void CloseLayer(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto layer_id{rp.Pop<u64>()};
+
+        LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}", layer_id);
+
+        nv_flinger->CloseLayer(layer_id);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
     void CreateStrayLayer(Kernel::HLERequestContext& ctx) {
         IPC::RequestParser rp{ctx};
         const u32 flags = rp.Pop<u32>();
@@ -1178,7 +1190,7 @@ IApplicationDisplayService::IApplicationDisplayService(
         {1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"},
         {1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"},
         {2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"},
-        {2021, nullptr, "CloseLayer"},
+        {2021, &IApplicationDisplayService::CloseLayer, "CloseLayer"},
         {2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"},
         {2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"},
         {2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index f1795fdd6..8908e5328 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -335,7 +335,8 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
             codeset_segment->addr = segment_addr;
             codeset_segment->size = aligned_size;
 
-            memcpy(&program_image[current_image_position], GetSegmentPtr(i), p->p_filesz);
+            std::memcpy(program_image.data() + current_image_position, GetSegmentPtr(i),
+                        p->p_filesz);
             current_image_position += aligned_size;
         }
     }
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp
index 474b55cb1..092103abe 100644
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <cstring>
 #include "core/file_sys/kernel_executable.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/gdbstub/gdbstub.h"
@@ -76,8 +77,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
         segment.addr = offset;
         segment.offset = offset;
         segment.size = PageAlignSize(static_cast<u32>(data.size()));
-        program_image.resize(offset);
-        program_image.insert(program_image.end(), data.begin(), data.end());
+        program_image.resize(offset + data.size());
+        std::memcpy(program_image.data() + offset, data.data(), data.size());
     };
 
     load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset());
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index f629892ae..515c5accb 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <cinttypes>
+#include <cstring>
 #include <vector>
 
 #include "common/common_funcs.h"
@@ -96,8 +97,9 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
         if (nso_header.IsSegmentCompressed(i)) {
             data = DecompressSegment(data, nso_header.segments[i]);
         }
-        program_image.resize(nso_header.segments[i].location);
-        program_image.insert(program_image.end(), data.begin(), data.end());
+        program_image.resize(nso_header.segments[i].location + data.size());
+        std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(),
+                    data.size());
         codeset.segments[i].addr = nso_header.segments[i].location;
         codeset.segments[i].offset = nso_header.segments[i].location;
         codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size()));
@@ -139,12 +141,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
         std::vector<u8> pi_header;
         pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
                          reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
-        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
-                         program_image.end());
+        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.data(),
+                         program_image.data() + program_image.size());
 
         pi_header = pm->PatchNSO(pi_header, file.GetName());
 
-        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
+        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data());
     }
 
     // Apply cheats if they exist and the program has a valid title ID
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 91bf07a92..f0888327f 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -14,6 +14,7 @@
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/hle/kernel/physical_memory.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
@@ -38,6 +39,11 @@ struct Memory::Impl {
         system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
     }
 
+    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                         Kernel::PhysicalMemory& memory, VAddr offset) {
+        MapMemoryRegion(page_table, base, size, memory.data() + offset);
+    }
+
     void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
         ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
         ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
@@ -146,7 +152,7 @@ struct Memory::Impl {
     u8* GetPointer(const VAddr vaddr) {
         u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
         if (page_pointer != nullptr) {
-            return page_pointer + (vaddr & PAGE_MASK);
+            return page_pointer + vaddr;
         }
 
         if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
@@ -229,7 +235,8 @@ struct Memory::Impl {
             case Common::PageType::Memory: {
                 DEBUG_ASSERT(page_table.pointers[page_index]);
 
-                const u8* const src_ptr = page_table.pointers[page_index] + page_offset;
+                const u8* const src_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                 std::memcpy(dest_buffer, src_ptr, copy_amount);
                 break;
             }
@@ -276,7 +283,8 @@ struct Memory::Impl {
             case Common::PageType::Memory: {
                 DEBUG_ASSERT(page_table.pointers[page_index]);
 
-                u8* const dest_ptr = page_table.pointers[page_index] + page_offset;
+                u8* const dest_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                 std::memcpy(dest_ptr, src_buffer, copy_amount);
                 break;
             }
@@ -322,7 +330,8 @@ struct Memory::Impl {
             case Common::PageType::Memory: {
                 DEBUG_ASSERT(page_table.pointers[page_index]);
 
-                u8* dest_ptr = page_table.pointers[page_index] + page_offset;
+                u8* dest_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                 std::memset(dest_ptr, 0, copy_amount);
                 break;
             }
@@ -368,7 +377,8 @@ struct Memory::Impl {
             }
             case Common::PageType::Memory: {
                 DEBUG_ASSERT(page_table.pointers[page_index]);
-                const u8* src_ptr = page_table.pointers[page_index] + page_offset;
+                const u8* src_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                 WriteBlock(process, dest_addr, src_ptr, copy_amount);
                 break;
             }
@@ -446,7 +456,8 @@ struct Memory::Impl {
                         page_type = Common::PageType::Unmapped;
                     } else {
                         page_type = Common::PageType::Memory;
-                        current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
+                        current_page_table->pointers[vaddr >> PAGE_BITS] =
+                            pointer - (vaddr & ~PAGE_MASK);
                     }
                     break;
                 }
@@ -493,7 +504,9 @@ struct Memory::Impl {
                       memory);
         } else {
             while (base != end) {
-                page_table.pointers[base] = memory;
+                page_table.pointers[base] = memory - (base << PAGE_BITS);
+                ASSERT_MSG(page_table.pointers[base],
+                           "memory mapping base yield a nullptr within the table");
 
                 base += 1;
                 memory += PAGE_SIZE;
@@ -518,7 +531,7 @@ struct Memory::Impl {
         if (page_pointer != nullptr) {
             // NOTE: Avoid adding any extra logic to this fast-path block
             T value;
-            std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
+            std::memcpy(&value, &page_pointer[vaddr], sizeof(T));
             return value;
         }
 
@@ -559,7 +572,7 @@ struct Memory::Impl {
         u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
         if (page_pointer != nullptr) {
             // NOTE: Avoid adding any extra logic to this fast-path block
-            std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
+            std::memcpy(&page_pointer[vaddr], &data, sizeof(T));
             return;
         }
 
@@ -594,6 +607,11 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) {
     impl->SetCurrentPageTable(process);
 }
 
+void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                             Kernel::PhysicalMemory& memory, VAddr offset) {
+    impl->MapMemoryRegion(page_table, base, size, memory, offset);
+}
+
 void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
     impl->MapMemoryRegion(page_table, base, size, target);
 }
diff --git a/src/core/memory.h b/src/core/memory.h
index 1428a6d60..8913a9da4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -19,8 +19,9 @@ class System;
 }
 
 namespace Kernel {
+class PhysicalMemory;
 class Process;
-}
+} // namespace Kernel
 
 namespace Memory {
 
@@ -66,6 +67,19 @@ public:
     void SetCurrentPageTable(Kernel::Process& process);
 
     /**
+     * Maps an physical buffer onto a region of the emulated process address space.
+     *
+     * @param page_table The page table of the emulated process.
+     * @param base       The address to start mapping at. Must be page-aligned.
+     * @param size       The amount of bytes to map. Must be page-aligned.
+     * @param memory     Physical buffer with the memory backing the mapping. Must be of length
+     *                   at least `size + offset`.
+     * @param offset     The offset within the physical memory. Must be page-aligned.
+     */
+    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                         Kernel::PhysicalMemory& memory, VAddr offset);
+
+    /**
      * Maps an allocated buffer onto a region of the emulated process address space.
      *
      * @param page_table The page table of the emulated process.
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 65d7b9f93..12c46e86f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -153,14 +153,29 @@ if (ENABLE_VULKAN)
         renderer_vulkan/fixed_pipeline_state.h
         renderer_vulkan/maxwell_to_vk.cpp
         renderer_vulkan/maxwell_to_vk.h
+        renderer_vulkan/renderer_vulkan.h
         renderer_vulkan/vk_buffer_cache.cpp
         renderer_vulkan/vk_buffer_cache.h
+        renderer_vulkan/vk_compute_pass.cpp
+        renderer_vulkan/vk_compute_pass.h
+        renderer_vulkan/vk_compute_pipeline.cpp
+        renderer_vulkan/vk_compute_pipeline.h
+        renderer_vulkan/vk_descriptor_pool.cpp
+        renderer_vulkan/vk_descriptor_pool.h
         renderer_vulkan/vk_device.cpp
         renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_graphics_pipeline.cpp
+        renderer_vulkan/vk_graphics_pipeline.h
         renderer_vulkan/vk_image.cpp
         renderer_vulkan/vk_image.h
         renderer_vulkan/vk_memory_manager.cpp
         renderer_vulkan/vk_memory_manager.h
+        renderer_vulkan/vk_pipeline_cache.cpp
+        renderer_vulkan/vk_pipeline_cache.h
+        renderer_vulkan/vk_rasterizer.cpp
+        renderer_vulkan/vk_rasterizer.h
+        renderer_vulkan/vk_renderpass_cache.cpp
+        renderer_vulkan/vk_renderpass_cache.h
         renderer_vulkan/vk_resource_manager.cpp
         renderer_vulkan/vk_resource_manager.h
         renderer_vulkan/vk_sampler_cache.cpp
@@ -169,12 +184,19 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_scheduler.h
         renderer_vulkan/vk_shader_decompiler.cpp
         renderer_vulkan/vk_shader_decompiler.h
+        renderer_vulkan/vk_shader_util.cpp
+        renderer_vulkan/vk_shader_util.h
         renderer_vulkan/vk_staging_buffer_pool.cpp
         renderer_vulkan/vk_staging_buffer_pool.h
         renderer_vulkan/vk_stream_buffer.cpp
         renderer_vulkan/vk_stream_buffer.h
         renderer_vulkan/vk_swapchain.cpp
-        renderer_vulkan/vk_swapchain.h)
+        renderer_vulkan/vk_swapchain.h
+        renderer_vulkan/vk_texture_cache.cpp
+        renderer_vulkan/vk_texture_cache.h
+        renderer_vulkan/vk_update_descriptor.cpp
+        renderer_vulkan/vk_update_descriptor.h
+    )
 
     target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
     target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index a35e7a195..ee79260fc 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1018,7 +1018,14 @@ public:
                     }
                 } instanced_arrays;
 
-                INSERT_UNION_PADDING_WORDS(0x6);
+                INSERT_UNION_PADDING_WORDS(0x4);
+
+                union {
+                    BitField<0, 1, u32> enable;
+                    BitField<4, 8, u32> unk4;
+                } vp_point_size;
+
+                INSERT_UNION_PADDING_WORDS(1);
 
                 Cull cull;
 
@@ -1271,8 +1278,6 @@ public:
 
     } dirty{};
 
-    std::array<u8, Regs::NUM_REGS> dirty_pointers{};
-
     /// Reads a register value located at the input method address
     u32 GetRegisterValue(u32 method) const;
 
@@ -1367,6 +1372,8 @@ private:
 
     bool execute_on{true};
 
+    std::array<u8, Regs::NUM_REGS> dirty_pointers{};
+
     /// Retrieves information about a specific TIC entry from the TIC buffer.
     Texture::TICEntry GetTICEntry(u32 tic_index) const;
 
@@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
 ASSERT_REG_POSITION(instanced_arrays, 0x620);
+ASSERT_REG_POSITION(vp_point_size, 0x644);
 ASSERT_REG_POSITION(cull, 0x646);
 ASSERT_REG_POSITION(pixel_center_integer, 0x649);
 ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 57b57c647..6f98bd827 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
     Trunc = 11,
 };
 
+enum class AtomicOp : u64 {
+    Add = 0,
+    Min = 1,
+    Max = 2,
+    Inc = 3,
+    Dec = 4,
+    And = 5,
+    Or = 6,
+    Xor = 7,
+    Exch = 8,
+};
+
 enum class UniformType : u64 {
     UnsignedByte = 0,
     SignedByte = 1,
@@ -236,6 +248,13 @@ enum class StoreType : u64 {
     Bits128 = 6,
 };
 
+enum class AtomicType : u64 {
+    U32 = 0,
+    S32 = 1,
+    U64 = 2,
+    S64 = 3,
+};
+
 enum class IMinMaxExchange : u64 {
     None = 0,
     XLo = 1,
@@ -939,6 +958,16 @@ union Instruction {
     } stg;
 
     union {
+        BitField<52, 4, AtomicOp> operation;
+        BitField<28, 2, AtomicType> type;
+        BitField<30, 22, s64> offset;
+
+        s32 GetImmediateOffset() const {
+            return static_cast<s32>(offset << 2);
+        }
+    } atoms;
+
+    union {
         BitField<32, 1, PhysicalAttributeDirection> direction;
         BitField<47, 3, AttributeSize> size;
         BitField<20, 11, u64> address;
@@ -1659,9 +1688,10 @@ public:
         ST_A,
         ST_L,
         ST_S,
-        ST,   // Store in generic memory
-        STG,  // Store in global memory
-        AL2P, // Transforms attribute memory into physical memory
+        ST,    // Store in generic memory
+        STG,   // Store in global memory
+        ATOMS, // Atomic operation on shared memory
+        AL2P,  // Transforms attribute memory into physical memory
         TEX,
         TEX_B,  // Texture Load Bindless
         TXQ,    // Texture Query
@@ -1964,6 +1994,7 @@ private:
             INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
             INST("101-------------", Id::ST, Type::Memory, "ST"),
             INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
             INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
             INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
             INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 095660115..b9c5c41a2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const {
     return *dma_pusher;
 }
 
-void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+void GPU::WaitFence(u32 syncpoint_id, u32 value) {
     // Synced GPU, is always in sync
     if (!is_async) {
         return;
     }
     MICROPROFILE_SCOPE(GPU_wait);
-    while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
-    }
+    std::unique_lock lock{sync_mutex};
+    sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
 }
 
 void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
     syncpoints[syncpoint_id]++;
     std::lock_guard lock{sync_mutex};
+    sync_cv.notify_all();
     if (!syncpt_interrupts[syncpoint_id].empty()) {
         u32 value = syncpoints[syncpoint_id].load();
         auto it = syncpt_interrupts[syncpoint_id].begin();
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ecc338ae9..b648317bb 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <atomic>
+#include <condition_variable>
 #include <list>
 #include <memory>
 #include <mutex>
@@ -181,7 +182,7 @@ public:
     virtual void WaitIdle() const = 0;
 
     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
-    void WaitFence(u32 syncpoint_id, u32 value) const;
+    void WaitFence(u32 syncpoint_id, u32 value);
 
     void IncrementSyncPoint(u32 syncpoint_id);
 
@@ -312,6 +313,8 @@ private:
 
     std::mutex sync_mutex;
 
+    std::condition_variable sync_cv;
+
     const bool is_async;
 };
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 672051102..c428f06e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() {
     const auto& regs = system.GPU().Maxwell3D().regs;
     // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
     // in OpenGL).
+    state.point.program_control = regs.vp_point_size.enable != 0;
     state.point.size = std::max(1.0f, regs.point_size);
 }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index de742d11c..a4acb3796 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR;
 
 namespace {
 
-// One UBO is always reserved for emulation values on staged shaders
-constexpr u32 STAGE_RESERVED_UBOS = 1;
-
 constexpr u32 STAGE_MAIN_OFFSET = 10;
 constexpr u32 KERNEL_MAIN_OFFSET = 0;
 
@@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
     if (!code_b.empty()) {
         ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
     }
-    const auto entries = GLShader::GetEntries(ir);
 
     std::string source = fmt::format(R"(// {}
 #version 430 core
@@ -314,9 +310,10 @@ std::unordered_set<GLenum> GetSupportedFormats() {
 
 CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
                            GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
-    : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache},
-      device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier},
-      shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} {
+    : RasterizerCacheObject{params.host_ptr}, system{params.system},
+      disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
+      unique_identifier{params.unique_identifier}, shader_type{shader_type},
+      entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
     if (!params.precompiled_variants) {
         return;
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a311dbcfe..2996aaf08 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -751,6 +751,9 @@ private:
 
     Expression Visit(const Node& node) {
         if (const auto operation = std::get_if<OperationNode>(&*node)) {
+            if (const auto amend_index = operation->GetAmendIndex()) {
+                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+            }
             const auto operation_index = static_cast<std::size_t>(operation->GetCode());
             if (operation_index >= operation_decompilers.size()) {
                 UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
@@ -872,6 +875,9 @@ private:
         }
 
         if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+            if (const auto amend_index = conditional->GetAmendIndex()) {
+                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+            }
             // It's invalid to call conditional on nested nodes, use an operation instead
             code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
             ++code.scope;
@@ -1850,6 +1856,16 @@ private:
                 Type::Uint};
     }
 
+    template <const std::string_view& opname, Type type>
+    Expression Atomic(Operation operation) {
+        ASSERT(stage == ShaderType::Compute);
+        auto& smem = std::get<SmemNode>(*operation[0]);
+
+        return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
+                            Visit(operation[1]).As(type)),
+                type};
+    }
+
     Expression Branch(Operation operation) {
         const auto target = std::get_if<ImmediateNode>(&*operation[0]);
         UNIMPLEMENTED_IF(!target);
@@ -2188,6 +2204,8 @@ private:
         &GLSLDecompiler::AtomicImage<Func::Xor>,
         &GLSLDecompiler::AtomicImage<Func::Exchange>,
 
+        &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
+
         &GLSLDecompiler::Branch,
         &GLSLDecompiler::BranchIndirect,
         &GLSLDecompiler::PushFlowStack,
@@ -2307,7 +2325,7 @@ public:
     explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
 
     void operator()(const ExprAnd& expr) {
-        inner += "( ";
+        inner += '(';
         std::visit(*this, *expr.operand1);
         inner += " && ";
         std::visit(*this, *expr.operand2);
@@ -2315,7 +2333,7 @@ public:
     }
 
     void operator()(const ExprOr& expr) {
-        inner += "( ";
+        inner += '(';
         std::visit(*this, *expr.operand1);
         inner += " || ";
         std::visit(*this, *expr.operand2);
@@ -2333,28 +2351,7 @@ public:
     }
 
     void operator()(const ExprCondCode& expr) {
-        const Node cc = decomp.ir.GetConditionCode(expr.cc);
-        std::string target;
-
-        if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
-            const auto index = pred->GetIndex();
-            switch (index) {
-            case Tegra::Shader::Pred::NeverExecute:
-                target = "false";
-                break;
-            case Tegra::Shader::Pred::UnusedIndex:
-                target = "true";
-                break;
-            default:
-                target = decomp.GetPredicate(index);
-                break;
-            }
-        } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
-            target = decomp.GetInternalFlag(flag->GetFlag());
-        } else {
-            UNREACHABLE();
-        }
-        inner += target;
+        inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
     }
 
     void operator()(const ExprVar& expr) {
@@ -2366,8 +2363,7 @@ public:
     }
 
     void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
-        inner +=
-            "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
+        inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
     }
 
     const std::string& GetResult() const {
@@ -2375,8 +2371,8 @@ public:
     }
 
 private:
-    std::string inner;
     GLSLDecompiler& decomp;
+    std::string inner;
 };
 
 class ASTDecompiler {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index df2e2395a..cc185e9e1 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() {
 }
 
 void OpenGLState::ApplyPointSize() {
+    Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
     if (UpdateValue(cur_state.point.size, point.size)) {
         glPointSize(point.size);
     }
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index fb180f302..678e5cd89 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -131,7 +131,8 @@ public:
     std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
 
     struct {
-        float size = 1.0f; // GL_POINT_SIZE
+        bool program_control = false; // GL_PROGRAM_POINT_SIZE
+        GLfloat size = 1.0f;          // GL_POINT_SIZE
     } point;
 
     struct {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b790b0ef4..e95eb069e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -44,7 +44,7 @@ struct FormatTuple {
 
 constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                        // ABGR8U
-    {GL_RGBA8, GL_RGBA, GL_BYTE, false},                                            // ABGR8S
+    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false},                                      // ABGR8S
     {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false},                         // ABGR8UI
     {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                        // B5G6R5U
     {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},                  // A2B10G10R10U
@@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
     {GL_RGB32F, GL_RGB, GL_FLOAT, false},                                           // RGB32F
     {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                 // RGBA8_SRGB
     {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false},                                       // RG8U
-    {GL_RG8, GL_RG, GL_BYTE, false},                                                // RG8S
+    {GL_RG8_SNORM, GL_RG, GL_BYTE, false},                                          // RG8S
     {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false},                             // RG32UI
-    {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false},                                   // RGBX16F
+    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false},                                     // RGBX16F
     {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false},                             // R32UI
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X8
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X5
@@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
         glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
         glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
         const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
+        u8* const mip_data = staging_buffer.data() + mip_offset;
+        const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
         if (is_compressed) {
-            glGetCompressedTextureImage(texture.handle, level,
-                                        static_cast<GLsizei>(params.GetHostMipmapSize(level)),
-                                        staging_buffer.data() + mip_offset);
+            glGetCompressedTextureImage(texture.handle, level, size, mip_data);
         } else {
-            glGetTextureImage(texture.handle, level, format, type,
-                              static_cast<GLsizei>(params.GetHostMipmapSize(level)),
-                              staging_buffer.data() + mip_offset);
+            glGetTextureImage(texture.handle, level, format, type, size, mip_data);
         }
     }
 }
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index 9770dda1c..ac99e6385 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -6,16 +6,20 @@
 #include <vector>
 
 #include <fmt/format.h>
-
 #include <glad/glad.h>
 
-#include "common/assert.h"
 #include "common/common_types.h"
-#include "common/scope_exit.h"
 #include "video_core/renderer_opengl/utils.h"
 
 namespace OpenGL {
 
+struct VertexArrayPushBuffer::Entry {
+    GLuint binding_index{};
+    const GLuint* buffer{};
+    GLintptr offset{};
+    GLsizei stride{};
+};
+
 VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
 
 VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
@@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() {
     }
 }
 
+struct BindBuffersRangePushBuffer::Entry {
+    GLuint binding;
+    const GLuint* buffer;
+    GLintptr offset;
+    GLsizeiptr size;
+};
+
 BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
 
 BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index d56153fe7..3ad7c02d4 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -26,12 +26,7 @@ public:
     void Bind();
 
 private:
-    struct Entry {
-        GLuint binding_index{};
-        const GLuint* buffer{};
-        GLintptr offset{};
-        GLsizei stride{};
-    };
+    struct Entry;
 
     GLuint vao{};
     const GLuint* index_buffer{};
@@ -50,12 +45,7 @@ public:
     void Bind();
 
 private:
-    struct Entry {
-        GLuint binding;
-        const GLuint* buffer;
-        GLintptr offset;
-        GLsizeiptr size;
-    };
+    struct Entry;
 
     GLenum target;
     std::vector<Entry> entries;
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 5a490f6ef..4e3ff231e 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -109,6 +109,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
     const auto topology = static_cast<std::size_t>(regs.draw.topology.Value());
     const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];
 
+    const auto& clip = regs.view_volume_clip_control;
+    const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
+
     Maxwell::Cull::FrontFace front_face = regs.cull.front_face;
     if (regs.screen_y_control.triangle_rast_flip != 0 &&
         regs.viewport_transform[0].scale_y > 0.0f) {
@@ -119,8 +122,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
     }
 
     const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
-    return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, gl_ndc,
-                                          regs.cull.cull_face, front_face);
+    return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled,
+                                          depth_clamp_enabled, gl_ndc, regs.cull.cull_face,
+                                          front_face);
 }
 
 } // Anonymous namespace
@@ -222,15 +226,17 @@ bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const
 std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept {
     return static_cast<std::size_t>(cull_enable) ^
            (static_cast<std::size_t>(depth_bias_enable) << 1) ^
-           (static_cast<std::size_t>(ndc_minus_one_to_one) << 2) ^
+           (static_cast<std::size_t>(depth_clamp_enable) << 2) ^
+           (static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^
            (static_cast<std::size_t>(cull_face) << 24) ^
            (static_cast<std::size_t>(front_face) << 48);
 }
 
 bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept {
-    return std::tie(cull_enable, depth_bias_enable, ndc_minus_one_to_one, cull_face, front_face) ==
-           std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.ndc_minus_one_to_one, rhs.cull_face,
-                    rhs.front_face);
+    return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one,
+                    cull_face, front_face) ==
+           std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable,
+                    rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face);
 }
 
 std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 04152c0d4..87056ef37 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -170,15 +170,17 @@ struct FixedPipelineState {
     };
 
     struct Rasterizer {
-        constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool ndc_minus_one_to_one,
-                             Maxwell::Cull::CullFace cull_face, Maxwell::Cull::FrontFace front_face)
+        constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
+                             bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face,
+                             Maxwell::Cull::FrontFace front_face)
             : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
-              ndc_minus_one_to_one{ndc_minus_one_to_one}, cull_face{cull_face}, front_face{
-                                                                                    front_face} {}
+              depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
+              cull_face{cull_face}, front_face{front_face} {}
         Rasterizer() = default;
 
         bool cull_enable;
         bool depth_bias_enable;
+        bool depth_clamp_enable;
         bool ndc_minus_one_to_one;
         Maxwell::Cull::CullFace cull_face;
         Maxwell::Cull::FrontFace front_face;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 000e3616d..331808113 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -44,7 +44,7 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt
     return {};
 }
 
-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
                                 Tegra::Texture::TextureFilter filter) {
     switch (wrap_mode) {
     case Tegra::Texture::WrapMode::Wrap:
@@ -56,7 +56,12 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
     case Tegra::Texture::WrapMode::Border:
         return vk::SamplerAddressMode::eClampToBorder;
     case Tegra::Texture::WrapMode::Clamp:
-        // TODO(Rodrigo): Emulate GL_CLAMP properly
+        if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+            // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
+            // by sending an invalid enumeration.
+            return static_cast<vk::SamplerAddressMode>(0xcafe);
+        }
+        // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors
         switch (filter) {
         case Tegra::Texture::TextureFilter::Nearest:
             return vk::SamplerAddressMode::eClampToEdge;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 1534b738b..7e9678b7b 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -22,7 +22,7 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter);
 
 vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
 
-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
                                 Tegra::Texture::TextureFilter filter);
 
 vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
new file mode 100644
index 000000000..a472c5dc9
--- /dev/null
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -0,0 +1,72 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <vector>
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Core {
+class System;
+}
+
+namespace Vulkan {
+
+class VKBlitScreen;
+class VKDevice;
+class VKFence;
+class VKMemoryManager;
+class VKResourceManager;
+class VKSwapchain;
+class VKScheduler;
+class VKImage;
+
+struct VKScreenInfo {
+    VKImage* image{};
+    u32 width{};
+    u32 height{};
+    bool is_srgb{};
+};
+
+class RendererVulkan final : public VideoCore::RendererBase {
+public:
+    explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
+    ~RendererVulkan() override;
+
+    /// Swap buffers (render frame)
+    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
+
+    /// Initialize the renderer
+    bool Init() override;
+
+    /// Shutdown the renderer
+    void ShutDown() override;
+
+private:
+    std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
+        const vk::DispatchLoaderDynamic& dldi);
+
+    bool PickDevices(const vk::DispatchLoaderDynamic& dldi);
+
+    void Report() const;
+
+    Core::System& system;
+
+    vk::Instance instance;
+    vk::SurfaceKHR surface;
+
+    VKScreenInfo screen_info;
+
+    UniqueDebugUtilsMessengerEXT debug_callback;
+    std::unique_ptr<VKDevice> device;
+    std::unique_ptr<VKSwapchain> swapchain;
+    std::unique_ptr<VKMemoryManager> memory_manager;
+    std::unique_ptr<VKResourceManager> resource_manager;
+    std::unique_ptr<VKScheduler> scheduler;
+    std::unique_ptr<VKBlitScreen> blit_screen;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 46da81aaa..1ba544943 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -2,124 +2,145 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <cstring>
 #include <memory>
 #include <optional>
 #include <tuple>
 
-#include "common/alignment.h"
 #include "common/assert.h"
-#include "core/memory.h"
-#include "video_core/memory_manager.h"
+#include "common/bit_util.h"
+#include "core/core.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 
 namespace Vulkan {
 
-CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
-                                     std::size_t alignment, u8* host_ptr)
-    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
-      alignment{alignment} {}
-
-VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
-                             Memory::Memory& cpu_memory_,
-                             VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                             VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
-    : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
-                                                                                   cpu_memory_} {
-    const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
-                       vk::BufferUsageFlagBits::eIndexBuffer |
-                       vk::BufferUsageFlagBits::eUniformBuffer;
-    const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
-                        vk::AccessFlagBits::eUniformRead;
-    stream_buffer =
-        std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
-                                         vk::PipelineStageFlagBits::eAllCommands);
-    buffer_handle = stream_buffer->GetBuffer();
-}
+namespace {
 
-VKBufferCache::~VKBufferCache() = default;
+const auto BufferUsage =
+    vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
+    vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
+
+const auto UploadPipelineStage =
+    vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
+    vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+    vk::PipelineStageFlagBits::eComputeShader;
 
-u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
-    const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");
-
-    // Cache management is a big overhead, so only cache entries with a given size.
-    // TODO: Figure out which size is the best for given games.
-    cache &= size >= 2048;
-
-    u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
-    if (cache) {
-        const auto entry = TryGet(host_ptr);
-        if (entry) {
-            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
-                return entry->GetOffset();
-            }
-            Unregister(entry);
-        }
-    }
-
-    AlignBuffer(alignment);
-    const u64 uploaded_offset = buffer_offset;
-
-    if (host_ptr == nullptr) {
-        return uploaded_offset;
-    }
-
-    std::memcpy(buffer_ptr, host_ptr, size);
-    buffer_ptr += size;
-    buffer_offset += size;
-
-    if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
-                                                         alignment, host_ptr);
-        Register(entry);
-    }
-
-    return uploaded_offset;
+const auto UploadAccessBarriers =
+    vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
+    vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
+    vk::AccessFlagBits::eIndexRead;
+
+auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
+    return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
 }
 
-u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
-    AlignBuffer(alignment);
-    std::memcpy(buffer_ptr, raw_pointer, size);
-    const u64 uploaded_offset = buffer_offset;
+} // Anonymous namespace
+
+CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                                     CacheAddr cache_addr, std::size_t size)
+    : VideoCommon::BufferBlock{cache_addr, size} {
+    const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
+                                         BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eTransferDst,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
 
-    buffer_ptr += size;
-    buffer_offset += size;
-    return uploaded_offset;
+    const auto& dld{device.GetDispatchLoader()};
+    const auto dev{device.GetLogical()};
+    buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    buffer.commit = memory_manager.Commit(*buffer.handle, false);
 }
 
-std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
-    AlignBuffer(alignment);
-    u8* const uploaded_ptr = buffer_ptr;
-    const u64 uploaded_offset = buffer_offset;
+CachedBufferBlock::~CachedBufferBlock() = default;
+
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                             const VKDevice& device, VKMemoryManager& memory_manager,
+                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
+    : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
+                                                                   CreateStreamBuffer(device,
+                                                                                      scheduler)},
+      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
+                                                                                staging_pool} {}
 
-    buffer_ptr += size;
-    buffer_offset += size;
-    return {uploaded_ptr, uploaded_offset};
+VKBufferCache::~VKBufferCache() = default;
+
+Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
 }
 
-void VKBufferCache::Reserve(std::size_t max_size) {
-    bool invalidate;
-    std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
-    buffer_offset = buffer_offset_base;
+const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
+    return buffer->GetHandle();
+}
+
+const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
+    size = std::max(size, std::size_t(4));
+    const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
+        cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
+    });
+    return &*empty.handle;
+}
 
-    if (invalidate) {
-        InvalidateAll();
-    }
+void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                    const u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    std::memcpy(staging.commit->Map(size), data, size);
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
+                                     offset, size)},
+            {}, dld);
+    });
 }
 
-void VKBufferCache::Send() {
-    stream_buffer->Send(buffer_offset - buffer_offset_base);
+void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                      u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+                vk::PipelineStageFlagBits::eComputeShader,
+            vk::PipelineStageFlagBits::eTransfer, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
+                                     vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
+            {}, dld);
+        cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
+    });
+    scheduler.Finish();
+
+    std::memcpy(data, staging.commit->Map(size), size);
 }
 
-void VKBufferCache::AlignBuffer(std::size_t alignment) {
-    // Align the offset, not the mapped pointer
-    const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
-    buffer_ptr += offset_aligned - buffer_offset;
-    buffer_offset = offset_aligned;
+void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                              std::size_t dst_offset, std::size_t size) {
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
+                      dst_offset, size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
+                                     vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
+             vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
+                                     dst_offset, size)},
+            {}, dld);
+    });
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index daa8ccf66..3f38eed0c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -5,105 +5,74 @@
 #pragma once
 
 #include <memory>
-#include <tuple>
+#include <unordered_map>
+#include <vector>
 
 #include "common/common_types.h"
-#include "video_core/gpu.h"
+#include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
 
-namespace Memory {
-class Memory;
-}
-
-namespace Tegra {
-class MemoryManager;
+namespace Core {
+class System;
 }
 
 namespace Vulkan {
 
 class VKDevice;
-class VKFence;
 class VKMemoryManager;
-class VKStreamBuffer;
+class VKScheduler;
 
-class CachedBufferEntry final : public RasterizerCacheObject {
+class CachedBufferBlock final : public VideoCommon::BufferBlock {
 public:
-    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
-                               u8* host_ptr);
+    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                               CacheAddr cache_addr, std::size_t size);
+    ~CachedBufferBlock();
 
-    VAddr GetCpuAddr() const override {
-        return cpu_addr;
-    }
-
-    std::size_t GetSizeInBytes() const override {
-        return size;
-    }
-
-    std::size_t GetSize() const {
-        return size;
-    }
-
-    u64 GetOffset() const {
-        return offset;
-    }
-
-    std::size_t GetAlignment() const {
-        return alignment;
+    const vk::Buffer* GetHandle() const {
+        return &*buffer.handle;
     }
 
 private:
-    VAddr cpu_addr{};
-    std::size_t size{};
-    u64 offset{};
-    std::size_t alignment{};
+    VKBuffer buffer;
 };
 
-class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+using Buffer = std::shared_ptr<CachedBufferBlock>;
+
+class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
 public:
-    explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_,
-                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                           VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
+    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                           const VKDevice& device, VKMemoryManager& memory_manager,
+                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
     ~VKBufferCache();
 
-    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
-    /// allocated.
-    u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
+    const vk::Buffer* GetEmptyBuffer(std::size_t size) override;
 
-    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
-    u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
+protected:
+    void WriteBarrier() override {}
 
-    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
-    std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
+    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
 
-    /// Reserves a region of memory to be used in subsequent upload/reserve operations.
-    void Reserve(std::size_t max_size);
+    const vk::Buffer* ToHandle(const Buffer& buffer) override;
 
-    /// Ensures that the set data is sent to the device.
-    void Send();
+    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                         const u8* data) override;
 
-    /// Returns the buffer cache handle.
-    vk::Buffer GetBuffer() const {
-        return buffer_handle;
-    }
+    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                           u8* data) override;
 
-protected:
-    // We do not have to flush this cache as things in it are never modified by us.
-    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                   std::size_t dst_offset, std::size_t size) override;
 
 private:
-    void AlignBuffer(std::size_t alignment);
-
-    Tegra::MemoryManager& tegra_memory_manager;
-    Memory::Memory& cpu_memory;
-
-    std::unique_ptr<VKStreamBuffer> stream_buffer;
-    vk::Buffer buffer_handle;
-
-    u8* buffer_ptr = nullptr;
-    u64 buffer_offset = 0;
-    u64 buffer_offset_base = 0;
+    const VKDevice& device;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
new file mode 100644
index 000000000..7bdda3d79
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -0,0 +1,339 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <optional>
+#include <utility>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+namespace {
+
+// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
+constexpr u8 quad_array[] = {
+    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
+    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
+    0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
+    0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
+    0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
+    0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
+    0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
+    0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
+    0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
+    0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
+    0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
+    0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
+    0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
+    0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
+    0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
+    0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+    0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
+    0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
+    0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
+    0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
+    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+
+// Uint8 SPIR-V module. Generated from the "shaders/" directory.
+constexpr u8 uint8_pass[] = {
+    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
+    0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
+    0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
+    0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
+    0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
+    0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
+    0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+    0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
+    0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
+    0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
+    0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
+    0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
+    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+
+} // Anonymous namespace
+
+VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
+                             const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                             const std::vector<vk::DescriptorUpdateTemplateEntry>& templates,
+                             const std::vector<vk::PushConstantRange> push_constants,
+                             std::size_t code_size, const u8* code) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+    descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld);
+
+    const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout,
+                                                          static_cast<u32>(push_constants.size()),
+                                                          push_constants.data());
+    layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
+
+    if (!templates.empty()) {
+        const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+            {}, static_cast<u32>(templates.size()), templates.data(),
+            vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+            vk::PipelineBindPoint::eGraphics, *layout, 0);
+        descriptor_template = dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+
+        descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
+    }
+
+    auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
+    std::memcpy(code_copy.get(), code, code_size);
+    const vk::ShaderModuleCreateInfo module_ci({}, code_size, code_copy.get());
+    module = dev.createShaderModuleUnique(module_ci, nullptr, dld);
+
+    const vk::PipelineShaderStageCreateInfo stage_ci({}, vk::ShaderStageFlagBits::eCompute, *module,
+                                                     "main", nullptr);
+
+    const vk::ComputePipelineCreateInfo pipeline_ci({}, stage_ci, *layout, nullptr, 0);
+    pipeline = dev.createComputePipelineUnique(nullptr, pipeline_ci, nullptr, dld);
+}
+
+VKComputePass::~VKComputePass() = default;
+
+vk::DescriptorSet VKComputePass::CommitDescriptorSet(
+    VKUpdateDescriptorQueue& update_descriptor_queue, VKFence& fence) {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator->Commit(fence);
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
+                             VKDescriptorPool& descriptor_pool,
+                             VKStagingBufferPool& staging_buffer_pool,
+                             VKUpdateDescriptorQueue& update_descriptor_queue)
+    : VKComputePass(device, descriptor_pool,
+                    {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1,
+                                                    vk::ShaderStageFlagBits::eCompute, nullptr)},
+                    {vk::DescriptorUpdateTemplateEntry(0, 0, 1, vk::DescriptorType::eStorageBuffer,
+                                                       0, sizeof(DescriptorUpdateEntry))},
+                    {vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32))},
+                    std::size(quad_array), quad_array),
+      scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
+      update_descriptor_queue{update_descriptor_queue} {}
+
+QuadArrayPass::~QuadArrayPass() = default;
+
+std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
+    const u32 num_triangle_vertices = num_vertices * 6 / 4;
+    const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
+    auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+
+    update_descriptor_queue.Acquire();
+    update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size);
+    const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    ASSERT(num_vertices % 4 == 0);
+    const u32 num_quads = num_vertices / 4;
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads,
+                      first, set](auto cmdbuf, auto& dld) {
+        constexpr u32 dispatch_size = 1024;
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld);
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld);
+        cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(first), &first,
+                             dld);
+        cmdbuf.dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1, dld);
+
+        const vk::BufferMemoryBarrier barrier(
+            vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead,
+            VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0,
+            static_cast<vk::DeviceSize>(num_quads) * 6 * sizeof(u32));
+        cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
+                               vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld);
+    });
+    return {*buffer.handle, 0};
+}
+
+Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
+                     VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
+                     VKUpdateDescriptorQueue& update_descriptor_queue)
+    : VKComputePass(device, descriptor_pool,
+                    {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1,
+                                                    vk::ShaderStageFlagBits::eCompute, nullptr),
+                     vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1,
+                                                    vk::ShaderStageFlagBits::eCompute, nullptr)},
+                    {vk::DescriptorUpdateTemplateEntry(0, 0, 2, vk::DescriptorType::eStorageBuffer,
+                                                       0, sizeof(DescriptorUpdateEntry))},
+                    {}, std::size(uint8_pass), uint8_pass),
+      scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
+      update_descriptor_queue{update_descriptor_queue} {}
+
+Uint8Pass::~Uint8Pass() = default;
+
+std::pair<const vk::Buffer*, u64> Uint8Pass::Assemble(u32 num_vertices, vk::Buffer src_buffer,
+                                                      u64 src_offset) {
+    const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
+    auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+
+    update_descriptor_queue.Acquire();
+    update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices);
+    update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size);
+    const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
+                      num_vertices](auto cmdbuf, auto& dld) {
+        constexpr u32 dispatch_size = 1024;
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld);
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld);
+        cmdbuf.dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1, dld);
+
+        const vk::BufferMemoryBarrier barrier(
+            vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead,
+            VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0,
+            static_cast<vk::DeviceSize>(num_vertices) * sizeof(u16));
+        cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
+                               vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld);
+    });
+    return {&*buffer.handle, 0};
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
new file mode 100644
index 000000000..7057eb837
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -0,0 +1,77 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKScheduler;
+class VKStagingBufferPool;
+class VKUpdateDescriptorQueue;
+
+class VKComputePass {
+public:
+    explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
+                           const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                           const std::vector<vk::DescriptorUpdateTemplateEntry>& templates,
+                           const std::vector<vk::PushConstantRange> push_constants,
+                           std::size_t code_size, const u8* code);
+    ~VKComputePass();
+
+protected:
+    vk::DescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
+                                          VKFence& fence);
+
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    UniquePipelineLayout layout;
+    UniquePipeline pipeline;
+
+private:
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    std::optional<DescriptorAllocator> descriptor_allocator;
+    UniqueShaderModule module;
+};
+
+class QuadArrayPass final : public VKComputePass {
+public:
+    explicit QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
+                           VKDescriptorPool& descriptor_pool,
+                           VKStagingBufferPool& staging_buffer_pool,
+                           VKUpdateDescriptorQueue& update_descriptor_queue);
+    ~QuadArrayPass();
+
+    std::pair<const vk::Buffer&, vk::DeviceSize> Assemble(u32 num_vertices, u32 first);
+
+private:
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_buffer_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+};
+
+class Uint8Pass final : public VKComputePass {
+public:
+    explicit Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
+                       VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
+                       VKUpdateDescriptorQueue& update_descriptor_queue);
+    ~Uint8Pass();
+
+    std::pair<const vk::Buffer*, u64> Assemble(u32 num_vertices, vk::Buffer src_buffer,
+                                               u64 src_offset);
+
+private:
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_buffer_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
new file mode 100644
index 000000000..9d5b8de7a
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -0,0 +1,112 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
+                                     VKDescriptorPool& descriptor_pool,
+                                     VKUpdateDescriptorQueue& update_descriptor_queue,
+                                     const SPIRVShader& shader)
+    : device{device}, scheduler{scheduler}, entries{shader.entries},
+      descriptor_set_layout{CreateDescriptorSetLayout()},
+      descriptor_allocator{descriptor_pool, *descriptor_set_layout},
+      update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
+      descriptor_template{CreateDescriptorUpdateTemplate()},
+      shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {}
+
+VKComputePipeline::~VKComputePipeline() = default;
+
+vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
+    std::vector<vk::DescriptorSetLayoutBinding> bindings;
+    u32 binding = 0;
+    const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
+        // TODO(Rodrigo): Maybe make individual bindings here?
+        for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
+            bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute,
+                                  nullptr);
+        }
+    };
+    AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
+
+    const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
+}
+
+UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const {
+    const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr);
+    const auto dev = device.GetLogical();
+    return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader());
+}
+
+UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const {
+    std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
+    u32 binding = 0;
+    u32 offset = 0;
+    FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries);
+    if (template_entries.empty()) {
+        // If the shader doesn't use descriptor sets, skip template creation.
+        return UniqueDescriptorUpdateTemplate{};
+    }
+
+    const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+        {}, static_cast<u32>(template_entries.size()), template_entries.data(),
+        vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+        vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+}
+
+UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
+    const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data());
+    const auto dev = device.GetLogical();
+    return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader());
+}
+
+UniquePipeline VKComputePipeline::CreatePipeline() const {
+    vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute,
+                                                      *shader_module, "main", nullptr);
+    vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
+    subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+    if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) {
+        shader_stage_ci.pNext = &subgroup_size_ci;
+    }
+
+    const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0);
+    const auto dev = device.GetLogical();
+    return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader());
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
new file mode 100644
index 000000000..22235c6c9
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -0,0 +1,66 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+class VKComputePipeline final {
+public:
+    explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
+                               VKDescriptorPool& descriptor_pool,
+                               VKUpdateDescriptorQueue& update_descriptor_queue,
+                               const SPIRVShader& shader);
+    ~VKComputePipeline();
+
+    vk::DescriptorSet CommitDescriptorSet();
+
+    vk::Pipeline GetHandle() const {
+        return *pipeline;
+    }
+
+    vk::PipelineLayout GetLayout() const {
+        return *layout;
+    }
+
+    const ShaderEntries& GetEntries() {
+        return entries;
+    }
+
+private:
+    UniqueDescriptorSetLayout CreateDescriptorSetLayout() const;
+
+    UniquePipelineLayout CreatePipelineLayout() const;
+
+    UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const;
+
+    UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const;
+
+    UniquePipeline CreatePipeline() const;
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    ShaderEntries entries;
+
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+    UniquePipelineLayout layout;
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    UniqueShaderModule shader_module;
+    UniquePipeline pipeline;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
new file mode 100644
index 000000000..cc7c281a0
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -0,0 +1,89 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
+constexpr std::size_t SETS_GROW_RATE = 0x20;
+
+DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
+                                         vk::DescriptorSetLayout layout)
+    : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
+
+DescriptorAllocator::~DescriptorAllocator() = default;
+
+vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
+    return *descriptors[CommitResource(fence)];
+}
+
+void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
+    auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin);
+    descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()),
+                       std::make_move_iterator(new_sets.end()));
+}
+
+VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
+    : device{device}, active_pool{AllocateNewPool()} {}
+
+VKDescriptorPool::~VKDescriptorPool() = default;
+
+vk::DescriptorPool VKDescriptorPool::AllocateNewPool() {
+    static constexpr u32 num_sets = 0x20000;
+    static constexpr vk::DescriptorPoolSize pool_sizes[] = {
+        {vk::DescriptorType::eUniformBuffer, num_sets * 90},
+        {vk::DescriptorType::eStorageBuffer, num_sets * 60},
+        {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64},
+        {vk::DescriptorType::eCombinedImageSampler, num_sets * 64},
+        {vk::DescriptorType::eStorageImage, num_sets * 40}};
+
+    const vk::DescriptorPoolCreateInfo create_info(
+        vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets,
+        static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes));
+    const auto dev = device.GetLogical();
+    return *pools.emplace_back(
+        dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader()));
+}
+
+std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors(
+    vk::DescriptorSetLayout layout, std::size_t count) {
+    std::vector layout_copies(count, layout);
+    vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count),
+                                                layout_copies.data());
+
+    std::vector<vk::DescriptorSet> sets(count);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) {
+    case vk::Result::eSuccess:
+        break;
+    case vk::Result::eErrorOutOfPoolMemory:
+        active_pool = AllocateNewPool();
+        allocate_info.descriptorPool = active_pool;
+        if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) {
+            break;
+        }
+        [[fallthrough]];
+    default:
+        vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique");
+    }
+
+    vk::PoolFree deleter(dev, active_pool, dld);
+    std::vector<UniqueDescriptorSet> unique_sets;
+    unique_sets.reserve(count);
+    for (const auto set : sets) {
+        unique_sets.push_back(UniqueDescriptorSet{set, deleter});
+    }
+    return unique_sets;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
new file mode 100644
index 000000000..a441dbc0f
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -0,0 +1,56 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+class VKDescriptorPool;
+
+class DescriptorAllocator final : public VKFencedPool {
+public:
+    explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout);
+    ~DescriptorAllocator() override;
+
+    DescriptorAllocator(const DescriptorAllocator&) = delete;
+
+    vk::DescriptorSet Commit(VKFence& fence);
+
+protected:
+    void Allocate(std::size_t begin, std::size_t end) override;
+
+private:
+    VKDescriptorPool& descriptor_pool;
+    const vk::DescriptorSetLayout layout;
+
+    std::vector<UniqueDescriptorSet> descriptors;
+};
+
+class VKDescriptorPool final {
+    friend DescriptorAllocator;
+
+public:
+    explicit VKDescriptorPool(const VKDevice& device);
+    ~VKDescriptorPool();
+
+private:
+    vk::DescriptorPool AllocateNewPool();
+
+    std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout,
+                                                         std::size_t count);
+
+    const VKDevice& device;
+
+    std::vector<UniqueDescriptorPool> pools;
+    vk::DescriptorPool active_pool;
+};
+
+} // namespace Vulkan
+\ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
new file mode 100644
index 000000000..2e0536bf6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -0,0 +1,271 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/microprofile.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+
+namespace {
+
+vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) {
+    return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail),
+                              MaxwellToVK::StencilOp(face.action_depth_pass),
+                              MaxwellToVK::StencilOp(face.action_depth_fail),
+                              MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0);
+}
+
+bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) {
+    static constexpr std::array unsupported_topologies = {
+        vk::PrimitiveTopology::ePointList,
+        vk::PrimitiveTopology::eLineList,
+        vk::PrimitiveTopology::eTriangleList,
+        vk::PrimitiveTopology::eLineListWithAdjacency,
+        vk::PrimitiveTopology::eTriangleListWithAdjacency,
+        vk::PrimitiveTopology::ePatchList};
+    return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
+                     topology) == std::end(unsupported_topologies);
+}
+
+} // Anonymous namespace
+
+VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
+                                       VKDescriptorPool& descriptor_pool,
+                                       VKUpdateDescriptorQueue& update_descriptor_queue,
+                                       VKRenderPassCache& renderpass_cache,
+                                       const GraphicsPipelineCacheKey& key,
+                                       const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                                       const SPIRVProgram& program)
+    : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
+      descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
+      descriptor_allocator{descriptor_pool, *descriptor_set_layout},
+      update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
+      descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
+                                                                        program)},
+      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
+                                                                             key.renderpass_params,
+                                                                             program)} {}
+
+VKGraphicsPipeline::~VKGraphicsPipeline() = default;
+
+vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
+    const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const {
+    const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
+}
+
+UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
+    const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0,
+                                                          nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
+}
+
+UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
+    const SPIRVProgram& program) const {
+    std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
+    u32 binding = 0;
+    u32 offset = 0;
+    for (const auto& stage : program) {
+        if (stage) {
+            FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset,
+                                                template_entries);
+        }
+    }
+    if (template_entries.empty()) {
+        // If the shader doesn't use descriptor sets, skip template creation.
+        return UniqueDescriptorUpdateTemplate{};
+    }
+
+    const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+        {}, static_cast<u32>(template_entries.size()), template_entries.data(),
+        vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+        vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+}
+
+std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules(
+    const SPIRVProgram& program) const {
+    std::vector<UniqueShaderModule> modules;
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
+        const auto& stage = program[i];
+        if (!stage) {
+            continue;
+        }
+        const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32),
+                                                   stage->code.data());
+        modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld));
+    }
+    return modules;
+}
+
+UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
+                                                  const SPIRVProgram& program) const {
+    const auto& vi = fixed_state.vertex_input;
+    const auto& ia = fixed_state.input_assembly;
+    const auto& ds = fixed_state.depth_stencil;
+    const auto& cd = fixed_state.color_blending;
+    const auto& ts = fixed_state.tessellation;
+    const auto& rs = fixed_state.rasterizer;
+
+    std::vector<vk::VertexInputBindingDescription> vertex_bindings;
+    std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
+    for (std::size_t i = 0; i < vi.num_bindings; ++i) {
+        const auto& binding = vi.bindings[i];
+        const bool instanced = binding.divisor != 0;
+        const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex;
+        vertex_bindings.emplace_back(binding.index, binding.stride, rate);
+        if (instanced) {
+            vertex_binding_divisors.emplace_back(binding.index, binding.divisor);
+        }
+    }
+
+    std::vector<vk::VertexInputAttributeDescription> vertex_attributes;
+    const auto& input_attributes = program[0]->entries.attributes;
+    for (std::size_t i = 0; i < vi.num_attributes; ++i) {
+        const auto& attribute = vi.attributes[i];
+        if (input_attributes.find(attribute.index) == input_attributes.end()) {
+            // Skip attributes not used by the vertex shaders.
+            continue;
+        }
+        vertex_attributes.emplace_back(attribute.index, attribute.buffer,
+                                       MaxwellToVK::VertexFormat(attribute.type, attribute.size),
+                                       attribute.offset);
+    }
+
+    vk::PipelineVertexInputStateCreateInfo vertex_input_ci(
+        {}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(),
+        static_cast<u32>(vertex_attributes.size()), vertex_attributes.data());
+
+    const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci(
+        static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data());
+    if (!vertex_binding_divisors.empty()) {
+        vertex_input_ci.pNext = &vertex_input_divisor_ci;
+    }
+
+    const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology);
+    const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci(
+        {}, primitive_topology,
+        ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology));
+
+    const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points);
+
+    const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr,
+                                                          Maxwell::NumViewports, nullptr);
+
+    // TODO(Rodrigo): Find out what's the default register value for front face
+    const vk::PipelineRasterizationStateCreateInfo rasterizer_ci(
+        {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill,
+        rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone,
+        rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise,
+        rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f);
+
+    const vk::PipelineMultisampleStateCreateInfo multisampling_ci(
+        {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false);
+
+    const vk::CompareOp depth_test_compare = ds.depth_test_enable
+                                                 ? MaxwellToVK::ComparisonOp(ds.depth_test_function)
+                                                 : vk::CompareOp::eAlways;
+
+    const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci(
+        {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable,
+        ds.stencil_enable, GetStencilFaceState(ds.front_stencil),
+        GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f);
+
+    std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
+    const std::size_t num_attachments =
+        std::min(cd.attachments_count, renderpass_params.color_attachments.size());
+    for (std::size_t i = 0; i < num_attachments; ++i) {
+        constexpr std::array component_table{
+            vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG,
+            vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA};
+        const auto& blend = cd.attachments[i];
+
+        vk::ColorComponentFlags color_components{};
+        for (std::size_t j = 0; j < component_table.size(); ++j) {
+            if (blend.components[j])
+                color_components |= component_table[j];
+        }
+
+        cb_attachments[i] = vk::PipelineColorBlendAttachmentState(
+            blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func),
+            MaxwellToVK::BlendFactor(blend.dst_rgb_func),
+            MaxwellToVK::BlendEquation(blend.rgb_equation),
+            MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func),
+            MaxwellToVK::BlendEquation(blend.a_equation), color_components);
+    }
+    const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy,
+                                                                  static_cast<u32>(num_attachments),
+                                                                  cb_attachments.data(), {});
+
+    constexpr std::array dynamic_states = {
+        vk::DynamicState::eViewport,         vk::DynamicState::eScissor,
+        vk::DynamicState::eDepthBias,        vk::DynamicState::eBlendConstants,
+        vk::DynamicState::eDepthBounds,      vk::DynamicState::eStencilCompareMask,
+        vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference};
+    const vk::PipelineDynamicStateCreateInfo dynamic_state_ci(
+        {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
+
+    vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
+    subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+
+    std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
+    std::size_t module_index = 0;
+    for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+        if (!program[stage]) {
+            continue;
+        }
+        const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage);
+        const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum);
+        auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage,
+                                                    *modules[module_index++], "main", nullptr);
+        if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) {
+            stage_ci.pNext = &subgroup_size_ci;
+        }
+    }
+
+    const vk::GraphicsPipelineCreateInfo create_info(
+        {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci,
+        &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci,
+        &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
new file mode 100644
index 000000000..4f5e4ea2d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+
+namespace Vulkan {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+struct GraphicsPipelineCacheKey;
+
+class VKDescriptorPool;
+class VKDevice;
+class VKRenderPassCache;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
+
+class VKGraphicsPipeline final {
+public:
+    explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
+                                VKDescriptorPool& descriptor_pool,
+                                VKUpdateDescriptorQueue& update_descriptor_queue,
+                                VKRenderPassCache& renderpass_cache,
+                                const GraphicsPipelineCacheKey& key,
+                                const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                                const SPIRVProgram& program);
+    ~VKGraphicsPipeline();
+
+    vk::DescriptorSet CommitDescriptorSet();
+
+    vk::Pipeline GetHandle() const {
+        return *pipeline;
+    }
+
+    vk::PipelineLayout GetLayout() const {
+        return *layout;
+    }
+
+    vk::RenderPass GetRenderPass() const {
+        return renderpass;
+    }
+
+private:
+    UniqueDescriptorSetLayout CreateDescriptorSetLayout(
+        const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const;
+
+    UniquePipelineLayout CreatePipelineLayout() const;
+
+    UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate(
+        const SPIRVProgram& program) const;
+
+    std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
+
+    UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params,
+                                  const SPIRVProgram& program) const;
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    const FixedPipelineState fixed_state;
+    const u64 hash;
+
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+    UniquePipelineLayout layout;
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    std::vector<UniqueShaderModule> modules;
+
+    vk::RenderPass renderpass;
+    UniquePipeline pipeline;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 0451babbf..9cc9979d0 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -6,6 +6,7 @@
 #include <optional>
 #include <tuple>
 #include <vector>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
@@ -16,34 +17,32 @@
 
 namespace Vulkan {
 
-// TODO(Rodrigo): Fine tune this number
-constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
+namespace {
+
+u64 GetAllocationChunkSize(u64 required_size) {
+    static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
+    auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
+    return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
+}
+
+} // Anonymous namespace
 
 class VKMemoryAllocation final {
 public:
     explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
-                                vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
-        : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
-          shifted_type{ShiftType(type)}, is_mappable{properties &
-                                                     vk::MemoryPropertyFlagBits::eHostVisible} {
-        if (is_mappable) {
-            const auto dev = device.GetLogical();
-            const auto& dld = device.GetDispatchLoader();
-            base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
-        }
-    }
+                                vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
+        : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
+          shifted_type{ShiftType(type)} {}
 
     ~VKMemoryAllocation() {
         const auto dev = device.GetLogical();
         const auto& dld = device.GetDispatchLoader();
-        if (is_mappable)
-            dev.unmapMemory(memory, dld);
         dev.free(memory, nullptr, dld);
     }
 
     VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
-        auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
-                                        static_cast<u64>(alignment));
+        auto found = TryFindFreeSection(free_iterator, allocation_size,
+                                        static_cast<u64>(commit_size), static_cast<u64>(alignment));
         if (!found) {
             found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
                                        static_cast<u64>(alignment));
@@ -52,8 +51,7 @@ public:
                 return nullptr;
             }
         }
-        u8* address = is_mappable ? base_address + *found : nullptr;
-        auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
+        auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
                                                            *found + commit_size);
         commits.push_back(commit.get());
 
@@ -65,12 +63,10 @@ public:
 
     void Free(const VKMemoryCommitImpl* commit) {
         ASSERT(commit);
-        const auto it =
-            std::find_if(commits.begin(), commits.end(),
-                         [&](const auto& stored_commit) { return stored_commit == commit; });
+
+        const auto it = std::find(std::begin(commits), std::end(commits), commit);
         if (it == commits.end()) {
-            LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
-            UNREACHABLE();
+            UNREACHABLE_MSG("Freeing unallocated commit!");
             return;
         }
         commits.erase(it);
@@ -88,11 +84,11 @@ private:
     }
 
     /// A memory allocator, it may return a free region between "start" and "end" with the solicited
-    /// requeriments.
+    /// requirements.
     std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
-        u64 iterator = start;
-        while (iterator + size < end) {
-            const u64 try_left = Common::AlignUp(iterator, alignment);
+        u64 iterator = Common::AlignUp(start, alignment);
+        while (iterator + size <= end) {
+            const u64 try_left = iterator;
             const u64 try_right = try_left + size;
 
             bool overlap = false;
@@ -100,7 +96,7 @@ private:
                 const auto [commit_left, commit_right] = commit->interval;
                 if (try_left < commit_right && commit_left < try_right) {
                     // There's an overlap, continue the search where the overlapping commit ends.
-                    iterator = commit_right;
+                    iterator = Common::AlignUp(commit_right, alignment);
                     overlap = true;
                     break;
                 }
@@ -110,6 +106,7 @@ private:
                 return try_left;
             }
         }
+
         // No free regions where found, return an empty optional.
         return std::nullopt;
     }
@@ -117,12 +114,8 @@ private:
     const VKDevice& device;                   ///< Vulkan device.
     const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler.
     const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
-    const u64 alloc_size;                     ///< Size of this allocation.
+    const u64 allocation_size;                ///< Size of this allocation.
     const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted.
-    const bool is_mappable;                   ///< Whether the allocation is mappable.
-
-    /// Base address of the mapped pointer.
-    u8* base_address{};
 
     /// Hints where the next free region is likely going to be.
     u64 free_iterator{};
@@ -132,13 +125,15 @@ private:
 };
 
 VKMemoryManager::VKMemoryManager(const VKDevice& device)
-    : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
-      is_memory_unified{GetMemoryUnified(props)} {}
+    : device{device}, properties{device.GetPhysical().getMemoryProperties(
+                          device.GetDispatchLoader())},
+      is_memory_unified{GetMemoryUnified(properties)} {}
 
 VKMemoryManager::~VKMemoryManager() = default;
 
-VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
-    ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
+VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
+                                       bool host_visible) {
+    const u64 chunk_size = GetAllocationChunkSize(requirements.size);
 
     // When a host visible commit is asked, search for host visible and coherent, otherwise search
     // for a fast device local type.
@@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
             ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
             : vk::MemoryPropertyFlagBits::eDeviceLocal;
 
-    const auto TryCommit = [&]() -> VKMemoryCommit {
-        for (auto& alloc : allocs) {
-            if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
-                continue;
-
-            if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
-                return commit;
-            }
-        }
-        return {};
-    };
-
-    if (auto commit = TryCommit(); commit) {
+    if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
         return commit;
     }
 
     // Commit has failed, allocate more memory.
-    if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
-        // TODO(Rodrigo): Try to use host memory.
-        LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
-        UNREACHABLE();
+    if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
+        // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
+        // Allocation has failed, panic.
+        UNREACHABLE_MSG("Ran out of VRAM!");
+        return {};
     }
 
     // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
     // there's a bug.
-    auto commit = TryCommit();
+    auto commit = TryAllocCommit(requirements, wanted_properties);
     ASSERT(commit);
     return commit;
 }
@@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
 VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
     const auto dev = device.GetLogical();
     const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
-    auto commit = Commit(requeriments, host_visible);
+    auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
     dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
     return commit;
 }
@@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
 VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
     const auto dev = device.GetLogical();
     const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getImageMemoryRequirements(image, dld);
-    auto commit = Commit(requeriments, host_visible);
+    auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
     dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
     return commit;
 }
 
 bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
                                   u64 size) {
-    const u32 type = [&]() {
-        for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
-            const auto flags = props.memoryTypes[type_index].propertyFlags;
+    const u32 type = [&] {
+        for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
+            const auto flags = properties.memoryTypes[type_index].propertyFlags;
             if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
                 // The type matches in type and in the wanted properties.
                 return type_index;
             }
         }
-        LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
-        UNREACHABLE();
-        return 0u;
+        UNREACHABLE_MSG("Couldn't find a compatible memory type!");
+        return 0U;
     }();
 
     const auto dev = device.GetLogical();
@@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
     // Try to allocate found type.
     const vk::MemoryAllocateInfo memory_ai(size, type);
     vk::DeviceMemory memory;
-    if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
+    if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
         res != vk::Result::eSuccess) {
         LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
         return false;
     }
-    allocs.push_back(
+    allocations.push_back(
         std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
     return true;
 }
 
-/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
-    for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
-        if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
+                                               vk::MemoryPropertyFlags wanted_properties) {
+    for (auto& allocation : allocations) {
+        if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
+            continue;
+        }
+        if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
+            return commit;
+        }
+    }
+    return {};
+}
+
+/*static*/ bool VKMemoryManager::GetMemoryUnified(
+    const vk::PhysicalDeviceMemoryProperties& properties) {
+    for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
+        if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
             // Memory is considered unified when heaps are device local only.
             return false;
         }
@@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
     return true;
 }
 
-VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
-                                       u8* data, u64 begin, u64 end)
-    : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
+VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
+                                       vk::DeviceMemory memory, u64 begin, u64 end)
+    : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}
 
 VKMemoryCommitImpl::~VKMemoryCommitImpl() {
     allocation->Free(this);
 }
 
-u8* VKMemoryCommitImpl::GetData() const {
-    ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
-    return data;
+MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
+    const auto dev = device.GetLogical();
+    const auto address = reinterpret_cast<u8*>(
+        dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
+    return MemoryMap{this, address};
+}
+
+void VKMemoryCommitImpl::Unmap() const {
+    const auto dev = device.GetLogical();
+    dev.unmapMemory(memory, device.GetDispatchLoader());
+}
+
+MemoryMap VKMemoryCommitImpl::Map() const {
+    return Map(interval.second - interval.first);
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
index 073597b35..cd00bb91b 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -12,6 +12,7 @@
 
 namespace Vulkan {
 
+class MemoryMap;
 class VKDevice;
 class VKMemoryAllocation;
 class VKMemoryCommitImpl;
@@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
 class VKMemoryManager final {
 public:
     explicit VKMemoryManager(const VKDevice& device);
+    VKMemoryManager(const VKMemoryManager&) = delete;
     ~VKMemoryManager();
 
     /**
      * Commits a memory with the specified requeriments.
-     * @param reqs Requeriments returned from a Vulkan call.
+     * @param requirements Requirements returned from a Vulkan call.
      * @param host_visible Signals the allocator that it *must* use host visible and coherent
-     * memory. When passing false, it will try to allocate device local memory.
+     *                     memory. When passing false, it will try to allocate device local memory.
      * @returns A memory commit.
      */
     VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
@@ -47,25 +49,35 @@ private:
     /// Allocates a chunk of memory.
     bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
 
+    /// Tries to allocate a memory commit.
+    VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
+                                  vk::MemoryPropertyFlags wanted_properties);
+
     /// Returns true if the device uses an unified memory model.
-    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
+    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
 
-    const VKDevice& device;                                  ///< Device handler.
-    const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties.
-    const bool is_memory_unified;                            ///< True if memory model is unified.
-    std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
+    const VKDevice& device;                              ///< Device handler.
+    const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
+    const bool is_memory_unified;                        ///< True if memory model is unified.
+    std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
 };
 
 class VKMemoryCommitImpl final {
     friend VKMemoryAllocation;
+    friend MemoryMap;
 
 public:
-    explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
-                                u64 begin, u64 end);
+    explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
+                                vk::DeviceMemory memory, u64 begin, u64 end);
     ~VKMemoryCommitImpl();
 
-    /// Returns the writeable memory map. The commit has to be mappable.
-    u8* GetData() const;
+    /// Maps a memory region and returns a pointer to it.
+    /// It's illegal to have more than one memory map at the same time.
+    MemoryMap Map(u64 size, u64 offset = 0) const;
+
+    /// Maps the whole commit and returns a pointer to it.
+    /// It's illegal to have more than one memory map at the same time.
+    MemoryMap Map() const;
 
     /// Returns the Vulkan memory handler.
     vk::DeviceMemory GetMemory() const {
@@ -78,10 +90,46 @@ public:
     }
 
 private:
+    /// Unmaps memory.
+    void Unmap() const;
+
+    const VKDevice& device;           ///< Vulkan device.
     std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
     vk::DeviceMemory memory;          ///< Vulkan device memory handler.
     VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
-    u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
+};
+
+/// Holds ownership of a memory map.
+class MemoryMap final {
+public:
+    explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
+        : commit{commit}, address{address} {}
+
+    ~MemoryMap() {
+        if (commit) {
+            commit->Unmap();
+        }
+    }
+
+    /// Prematurely releases the memory map.
+    void Release() {
+        commit->Unmap();
+        commit = nullptr;
+    }
+
+    /// Returns the address of the memory map.
+    u8* GetAddress() const {
+        return address;
+    }
+
+    /// Returns the address of the memory map;
+    operator u8*() const {
+        return address;
+    }
+
+private:
+    const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
+    u8* address{};                      ///< Address to the mapped memory.
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
new file mode 100644
index 000000000..48e23d4cd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -0,0 +1,395 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "common/microprofile.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/shader/compiler_settings.h"
+
+namespace Vulkan {
+
+MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+
+using Tegra::Engines::ShaderType;
+
+namespace {
+
+constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
+    VideoCommon::Shader::CompileDepth::FullDecompile};
+
+/// Gets the address for the specified shader stage program
+GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
+    const auto& gpu{system.GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+}
+
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
+    const std::size_t start_offset = is_compute ? 0 : 10;
+    // This is the encoded version of BRA that jumps to itself. All Nvidia
+    // shaders end with one.
+    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
+    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
+    std::size_t offset = start_offset;
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if ((instruction & mask) == self_jumping_branch) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+            if (instruction == 0) {
+                break;
+            }
+        }
+        ++offset;
+    }
+    // The last instruction is included in the program size
+    return std::min(offset + 1, program.size());
+}
+
+/// Gets the shader program code from memory for the specified address
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
+                          const u8* host_ptr, bool is_compute) {
+    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+        std::fill(program_code.begin(), program_code.end(), 0);
+        return program_code;
+    });
+    memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
+                                   program_code.size() * sizeof(u64));
+    program_code.resize(CalculateProgramSize(program_code, is_compute));
+    return program_code;
+}
+
+constexpr std::size_t GetStageFromProgram(std::size_t program) {
+    return program == 0 ? 0 : program - 1;
+}
+
+constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
+    return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
+}
+
+ShaderType GetShaderType(Maxwell::ShaderProgram program) {
+    switch (program) {
+    case Maxwell::ShaderProgram::VertexB:
+        return ShaderType::Vertex;
+    case Maxwell::ShaderProgram::TesselationControl:
+        return ShaderType::TesselationControl;
+    case Maxwell::ShaderProgram::TesselationEval:
+        return ShaderType::TesselationEval;
+    case Maxwell::ShaderProgram::Geometry:
+        return ShaderType::Geometry;
+    case Maxwell::ShaderProgram::Fragment:
+        return ShaderType::Fragment;
+    default:
+        UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program));
+        return ShaderType::Vertex;
+    }
+}
+
+u32 FillDescriptorLayout(const ShaderEntries& entries,
+                         std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                         Maxwell::ShaderProgram program_type, u32 base_binding) {
+    const ShaderType stage = GetStageFromProgram(program_type);
+    const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage);
+
+    u32 binding = base_binding;
+    const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
+        for (std::size_t i = 0; i < num_entries; ++i) {
+            bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr);
+        }
+    };
+    AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
+    return binding;
+}
+
+} // Anonymous namespace
+
+CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
+                           GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
+                           ProgramCode program_code, u32 main_offset)
+    : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
+      program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
+      shader_ir{this->program_code, main_offset, compiler_settings, locker},
+      entries{GenerateShaderEntries(shader_ir)} {}
+
+CachedShader::~CachedShader() = default;
+
+Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
+    Core::System& system, Tegra::Engines::ShaderType stage) {
+    if (stage == Tegra::Engines::ShaderType::Compute) {
+        return system.GPU().KeplerCompute();
+    } else {
+        return system.GPU().Maxwell3D();
+    }
+}
+
+VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
+                                 const VKDevice& device, VKScheduler& scheduler,
+                                 VKDescriptorPool& descriptor_pool,
+                                 VKUpdateDescriptorQueue& update_descriptor_queue)
+    : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
+      descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
+      renderpass_cache(device) {}
+
+VKPipelineCache::~VKPipelineCache() = default;
+
+std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+    const auto& gpu = system.GPU().Maxwell3D();
+    auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
+    if (!dirty) {
+        return last_shaders;
+    }
+    dirty = false;
+
+    std::array<Shader, Maxwell::MaxShaderProgram> shaders;
+    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        const auto& shader_config = gpu.regs.shader_config[index];
+        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
+
+        // Skip stages that are not enabled
+        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+            continue;
+        }
+
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const GPUVAddr program_addr{GetShaderAddress(system, program)};
+        const auto host_ptr{memory_manager.GetPointer(program_addr)};
+        auto shader = TryGet(host_ptr);
+        if (!shader) {
+            // No shader found - create a new one
+            constexpr u32 stage_offset = 10;
+            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
+            auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+
+            const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+            ASSERT(cpu_addr);
+
+            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
+                                                    host_ptr, std::move(code), stage_offset);
+            Register(shader);
+        }
+        shaders[index] = std::move(shader);
+    }
+    return last_shaders = shaders;
+}
+
+VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
+
+    if (last_graphics_pipeline && last_graphics_key == key) {
+        return *last_graphics_pipeline;
+    }
+    last_graphics_key = key;
+
+    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
+    auto& entry = pair->second;
+    if (is_cache_miss) {
+        LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+        const auto [program, bindings] = DecompileShaders(key);
+        entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
+                                                     update_descriptor_queue, renderpass_cache, key,
+                                                     bindings, program);
+    }
+    return *(last_graphics_pipeline = entry.get());
+}
+
+VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
+
+    const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
+    auto& entry = pair->second;
+    if (!is_cache_miss) {
+        return *entry;
+    }
+    LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+
+    auto& memory_manager = system.GPU().MemoryManager();
+    const auto program_addr = key.shader;
+    const auto host_ptr = memory_manager.GetPointer(program_addr);
+
+    auto shader = TryGet(host_ptr);
+    if (!shader) {
+        // No shader found - create a new one
+        const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+        ASSERT(cpu_addr);
+
+        auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
+        constexpr u32 kernel_main_offset = 0;
+        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
+                                                program_addr, *cpu_addr, host_ptr, std::move(code),
+                                                kernel_main_offset);
+        Register(shader);
+    }
+
+    Specialization specialization;
+    specialization.workgroup_size = key.workgroup_size;
+    specialization.shared_memory_size = key.shared_memory_size;
+
+    const SPIRVShader spirv_shader{
+        Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
+        shader->GetEntries()};
+    entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
+                                                update_descriptor_queue, spirv_shader);
+    return *entry;
+}
+
+void VKPipelineCache::Unregister(const Shader& shader) {
+    bool finished = false;
+    const auto Finish = [&] {
+        // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
+        // flush.
+        if (finished) {
+            return;
+        }
+        finished = true;
+        scheduler.Finish();
+    };
+
+    const GPUVAddr invalidated_addr = shader->GetGpuAddr();
+    for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
+        auto& entry = it->first;
+        if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
+            entry.shaders.end()) {
+            ++it;
+            continue;
+        }
+        Finish();
+        it = graphics_cache.erase(it);
+    }
+    for (auto it = compute_cache.begin(); it != compute_cache.end();) {
+        auto& entry = it->first;
+        if (entry.shader != invalidated_addr) {
+            ++it;
+            continue;
+        }
+        Finish();
+        it = compute_cache.erase(it);
+    }
+
+    RasterizerCache::Unregister(shader);
+}
+
+std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>>
+VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
+    const auto& fixed_state = key.fixed_state;
+    auto& memory_manager = system.GPU().MemoryManager();
+    const auto& gpu = system.GPU().Maxwell3D();
+
+    Specialization specialization;
+    specialization.primitive_topology = fixed_state.input_assembly.topology;
+    if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
+        ASSERT(fixed_state.input_assembly.point_size != 0.0f);
+        specialization.point_size = fixed_state.input_assembly.point_size;
+    }
+    for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
+        specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
+    }
+    specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
+    specialization.tessellation.primitive = fixed_state.tessellation.primitive;
+    specialization.tessellation.spacing = fixed_state.tessellation.spacing;
+    specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
+    for (const auto& rt : key.renderpass_params.color_attachments) {
+        specialization.enabled_rendertargets.set(rt.index);
+    }
+
+    SPIRVProgram program;
+    std::vector<vk::DescriptorSetLayoutBinding> bindings;
+
+    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
+
+        // Skip stages that are not enabled
+        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+            continue;
+        }
+
+        const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
+        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+        const auto shader = TryGet(host_ptr);
+        ASSERT(shader);
+
+        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
+        const auto program_type = GetShaderType(program_enum);
+        const auto& entries = shader->GetEntries();
+        program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
+                          entries};
+
+        if (program_enum == Maxwell::ShaderProgram::VertexA) {
+            // VertexB was combined with VertexA, so we skip the VertexB iteration
+            ++index;
+        }
+
+        const u32 old_binding = specialization.base_binding;
+        specialization.base_binding =
+            FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
+        ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
+    }
+    return {std::move(program), std::move(bindings)};
+}
+
+void FillDescriptorUpdateTemplateEntries(
+    const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
+    std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
+    static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
+    const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) {
+        const u32 count = static_cast<u32>(count_);
+        if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer &&
+            device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+            // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
+            // crash.
+            for (u32 i = 0; i < count; ++i) {
+                template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
+                                              offset + i * entry_size, entry_size);
+            }
+        } else if (count != 0) {
+            template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
+        }
+        offset += count * entry_size;
+        binding += count;
+    };
+
+    AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddEntry(vk::DescriptorType::eStorageImage, entries.images.size());
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
new file mode 100644
index 000000000..8678fc9c3
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -0,0 +1,200 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <boost/functional/hash.hpp>
+
+#include "common/common_types.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/shader_ir.h"
+#include "video_core/surface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Vulkan {
+
+class RasterizerVulkan;
+class VKComputePipeline;
+class VKDescriptorPool;
+class VKDevice;
+class VKFence;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+class CachedShader;
+using Shader = std::shared_ptr<CachedShader>;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+using ProgramCode = std::vector<u64>;
+
+struct GraphicsPipelineCacheKey {
+    FixedPipelineState fixed_state;
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+    RenderPassParams renderpass_params;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = fixed_state.Hash();
+        for (const auto& shader : shaders) {
+            boost::hash_combine(hash, shader);
+        }
+        boost::hash_combine(hash, renderpass_params.Hash());
+        return hash;
+    }
+
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return std::tie(fixed_state, shaders, renderpass_params) ==
+               std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
+    }
+};
+
+struct ComputePipelineCacheKey {
+    GPUVAddr shader{};
+    u32 shared_memory_size{};
+    std::array<u32, 3> workgroup_size{};
+
+    std::size_t Hash() const noexcept {
+        return static_cast<std::size_t>(shader) ^
+               ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
+               static_cast<std::size_t>(workgroup_size[0]) ^
+               (static_cast<std::size_t>(workgroup_size[1]) << 16) ^
+               (static_cast<std::size_t>(workgroup_size[2]) << 24);
+    }
+
+    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+        return std::tie(shader, shared_memory_size, workgroup_size) ==
+               std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::GraphicsPipelineCacheKey> {
+    std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+template <>
+struct hash<Vulkan::ComputePipelineCacheKey> {
+    std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class CachedShader final : public RasterizerCacheObject {
+public:
+    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+                          VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
+    ~CachedShader();
+
+    GPUVAddr GetGpuAddr() const {
+        return gpu_addr;
+    }
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
+    }
+
+    std::size_t GetSizeInBytes() const override {
+        return program_code.size() * sizeof(u64);
+    }
+
+    VideoCommon::Shader::ShaderIR& GetIR() {
+        return shader_ir;
+    }
+
+    const VideoCommon::Shader::ShaderIR& GetIR() const {
+        return shader_ir;
+    }
+
+    const ShaderEntries& GetEntries() const {
+        return entries;
+    }
+
+private:
+    static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
+                                                                 Tegra::Engines::ShaderType stage);
+
+    GPUVAddr gpu_addr{};
+    VAddr cpu_addr{};
+    ProgramCode program_code;
+    VideoCommon::Shader::ConstBufferLocker locker;
+    VideoCommon::Shader::ShaderIR shader_ir;
+    ShaderEntries entries;
+};
+
+class VKPipelineCache final : public RasterizerCache<Shader> {
+public:
+    explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
+                             const VKDevice& device, VKScheduler& scheduler,
+                             VKDescriptorPool& descriptor_pool,
+                             VKUpdateDescriptorQueue& update_descriptor_queue);
+    ~VKPipelineCache();
+
+    std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
+
+    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
+
+    VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
+
+protected:
+    void Unregister(const Shader& shader) override;
+
+    void FlushObjectInner(const Shader& object) override {}
+
+private:
+    std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders(
+        const GraphicsPipelineCacheKey& key);
+
+    Core::System& system;
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    VKDescriptorPool& descriptor_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+
+    VKRenderPassCache renderpass_cache;
+
+    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+
+    GraphicsPipelineCacheKey last_graphics_key;
+    VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+
+    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
+        graphics_cache;
+    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
+};
+
+void FillDescriptorUpdateTemplateEntries(
+    const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
+    std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
new file mode 100644
index 000000000..d2c6b1189
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -0,0 +1,1141 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <memory>
+#include <mutex>
+#include <vector>
+
+#include <boost/container/static_vector.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/microprofile.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_sampler_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
+MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
+
+namespace {
+
+constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute);
+
+vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
+    const auto& viewport = regs.viewport_transform[index];
+    const float x = viewport.translate_x - viewport.scale_x;
+    const float y = viewport.translate_y - viewport.scale_y;
+    const float width = viewport.scale_x * 2.0f;
+    const float height = viewport.scale_y * 2.0f;
+
+    const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
+    float near = viewport.translate_z - viewport.scale_z * reduce_z;
+    float far = viewport.translate_z + viewport.scale_z;
+    if (!device.IsExtDepthRangeUnrestrictedSupported()) {
+        near = std::clamp(near, 0.0f, 1.0f);
+        far = std::clamp(far, 0.0f, 1.0f);
+    }
+
+    return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far);
+}
+
+constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) {
+    const auto& scissor = regs.scissor_test[index];
+    if (!scissor.enable) {
+        return {{0, 0}, {INT32_MAX, INT32_MAX}};
+    }
+    const u32 width = scissor.max_x - scissor.min_x;
+    const u32 height = scissor.max_y - scissor.min_y;
+    return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}};
+}
+
+std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
+    const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
+    for (std::size_t i = 0; i < std::size(addresses); ++i) {
+        addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
+    }
+    return addresses;
+}
+
+void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage,
+                      vk::AccessFlags access) {
+    for (auto& [view, layout] : views) {
+        view->Transition(*layout, pipeline_stage, access);
+    }
+}
+
+template <typename Engine, typename Entry>
+Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
+                                               std::size_t stage) {
+    const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
+    if (entry.IsBindless()) {
+        const Tegra::Texture::TextureHandle tex_handle =
+            engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset());
+        return engine.GetTextureInfo(tex_handle);
+    }
+    if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
+        return engine.GetStageTexture(stage_type, entry.GetOffset());
+    } else {
+        return engine.GetTexture(entry.GetOffset());
+    }
+}
+
+} // Anonymous namespace
+
+class BufferBindings final {
+public:
+    void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) {
+        vertex.buffer_ptrs[vertex.num_buffers] = buffer;
+        vertex.offsets[vertex.num_buffers] = offset;
+        ++vertex.num_buffers;
+    }
+
+    void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) {
+        index.buffer = buffer;
+        index.offset = offset;
+        index.type = type;
+    }
+
+    void Bind(VKScheduler& scheduler) const {
+        // Use this large switch case to avoid dispatching more memory in the record lambda than
+        // what we need. It looks horrible, but it's the best we can do on standard C++.
+        switch (vertex.num_buffers) {
+        case 0:
+            return BindStatic<0>(scheduler);
+        case 1:
+            return BindStatic<1>(scheduler);
+        case 2:
+            return BindStatic<2>(scheduler);
+        case 3:
+            return BindStatic<3>(scheduler);
+        case 4:
+            return BindStatic<4>(scheduler);
+        case 5:
+            return BindStatic<5>(scheduler);
+        case 6:
+            return BindStatic<6>(scheduler);
+        case 7:
+            return BindStatic<7>(scheduler);
+        case 8:
+            return BindStatic<8>(scheduler);
+        case 9:
+            return BindStatic<9>(scheduler);
+        case 10:
+            return BindStatic<10>(scheduler);
+        case 11:
+            return BindStatic<11>(scheduler);
+        case 12:
+            return BindStatic<12>(scheduler);
+        case 13:
+            return BindStatic<13>(scheduler);
+        case 14:
+            return BindStatic<14>(scheduler);
+        case 15:
+            return BindStatic<15>(scheduler);
+        case 16:
+            return BindStatic<16>(scheduler);
+        case 17:
+            return BindStatic<17>(scheduler);
+        case 18:
+            return BindStatic<18>(scheduler);
+        case 19:
+            return BindStatic<19>(scheduler);
+        case 20:
+            return BindStatic<20>(scheduler);
+        case 21:
+            return BindStatic<21>(scheduler);
+        case 22:
+            return BindStatic<22>(scheduler);
+        case 23:
+            return BindStatic<23>(scheduler);
+        case 24:
+            return BindStatic<24>(scheduler);
+        case 25:
+            return BindStatic<25>(scheduler);
+        case 26:
+            return BindStatic<26>(scheduler);
+        case 27:
+            return BindStatic<27>(scheduler);
+        case 28:
+            return BindStatic<28>(scheduler);
+        case 29:
+            return BindStatic<29>(scheduler);
+        case 30:
+            return BindStatic<30>(scheduler);
+        case 31:
+            return BindStatic<31>(scheduler);
+        case 32:
+            return BindStatic<32>(scheduler);
+        }
+        UNREACHABLE();
+    }
+
+private:
+    // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
+    struct {
+        std::size_t num_buffers = 0;
+        std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs;
+        std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets;
+    } vertex;
+
+    struct {
+        const vk::Buffer* buffer = nullptr;
+        vk::DeviceSize offset;
+        vk::IndexType type;
+    } index;
+
+    template <std::size_t N>
+    void BindStatic(VKScheduler& scheduler) const {
+        if (index.buffer != nullptr) {
+            BindStatic<N, true>(scheduler);
+        } else {
+            BindStatic<N, false>(scheduler);
+        }
+    }
+
+    template <std::size_t N, bool is_indexed>
+    void BindStatic(VKScheduler& scheduler) const {
+        static_assert(N <= Maxwell::NumVertexArrays);
+        if constexpr (N == 0) {
+            return;
+        }
+
+        std::array<vk::Buffer, N> buffers;
+        std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(),
+                       [](const auto ptr) { return *ptr; });
+
+        std::array<vk::DeviceSize, N> offsets;
+        std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
+
+        if constexpr (is_indexed) {
+            // Indexed draw
+            scheduler.Record([buffers, offsets, index_buffer = *index.buffer,
+                              index_offset = index.offset,
+                              index_type = index.type](auto cmdbuf, auto& dld) {
+                cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld);
+                cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(),
+                                         dld);
+            });
+        } else {
+            // Array draw
+            scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) {
+                cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(),
+                                         dld);
+            });
+        }
+    }
+};
+
+void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf,
+                                            const vk::DispatchLoaderDynamic& dld) const {
+    if (is_indexed) {
+        cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld);
+    } else {
+        cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld);
+    }
+}
+
+RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
+                                   VKScreenInfo& screen_info, const VKDevice& device,
+                                   VKResourceManager& resource_manager,
+                                   VKMemoryManager& memory_manager, VKScheduler& scheduler)
+    : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
+      screen_info{screen_info}, device{device}, resource_manager{resource_manager},
+      memory_manager{memory_manager}, scheduler{scheduler},
+      staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
+      update_descriptor_queue(device, scheduler),
+      quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
+      uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
+      texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
+                    staging_pool),
+      pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
+      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
+      sampler_cache(device) {}
+
+RasterizerVulkan::~RasterizerVulkan() = default;
+
+bool RasterizerVulkan::DrawBatch(bool is_indexed) {
+    Draw(is_indexed, false);
+    return true;
+}
+
+bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) {
+    Draw(is_indexed, true);
+    return true;
+}
+
+void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
+    MICROPROFILE_SCOPE(Vulkan_Drawing);
+
+    FlushWork();
+
+    const auto& gpu = system.GPU().Maxwell3D();
+    GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
+
+    buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
+
+    BufferBindings buffer_bindings;
+    const DrawParameters draw_params =
+        SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
+
+    update_descriptor_queue.Acquire();
+    sampled_views.clear();
+    image_views.clear();
+
+    const auto shaders = pipeline_cache.GetShaders();
+    key.shaders = GetShaderAddresses(shaders);
+    SetupShaderDescriptors(shaders);
+
+    buffer_cache.Unmap();
+
+    const auto texceptions = UpdateAttachments();
+    SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
+
+    key.renderpass_params = GetRenderPassParams(texceptions);
+
+    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
+    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
+
+    const auto renderpass = pipeline.GetRenderPass();
+    const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
+    scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
+
+    UpdateDynamicStates();
+
+    buffer_bindings.Bind(scheduler);
+
+    if (device.IsNvDeviceDiagnosticCheckpoints()) {
+        scheduler.Record(
+            [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
+    }
+
+    const auto pipeline_layout = pipeline.GetLayout();
+    const auto descriptor_set = pipeline.CommitDescriptorSet();
+    scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
+        if (descriptor_set) {
+            cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout,
+                                      DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld);
+        }
+        draw_params.Draw(cmdbuf, dld);
+    });
+}
+
+void RasterizerVulkan::Clear() {
+    MICROPROFILE_SCOPE(Vulkan_Clearing);
+
+    const auto& gpu = system.GPU().Maxwell3D();
+    if (!system.GPU().Maxwell3D().ShouldExecute()) {
+        return;
+    }
+
+    const auto& regs = gpu.regs;
+    const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
+                           regs.clear_buffers.A;
+    const bool use_depth = regs.clear_buffers.Z;
+    const bool use_stencil = regs.clear_buffers.S;
+    if (!use_color && !use_depth && !use_stencil) {
+        return;
+    }
+    // Clearing images requires to be out of a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass.
+
+    if (use_color) {
+        View color_view;
+        {
+            MICROPROFILE_SCOPE(Vulkan_RenderTargets);
+            color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false);
+        }
+
+        color_view->Transition(vk::ImageLayout::eTransferDstOptimal,
+                               vk::PipelineStageFlagBits::eTransfer,
+                               vk::AccessFlagBits::eTransferWrite);
+
+        const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
+                                        regs.clear_color[2], regs.clear_color[3]};
+        const vk::ClearColorValue clear(clear_color);
+        scheduler.Record([image = color_view->GetImage(),
+                          subresource = color_view->GetImageSubresourceRange(),
+                          clear](auto cmdbuf, auto& dld) {
+            cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource,
+                                   dld);
+        });
+    }
+    if (use_depth || use_stencil) {
+        View zeta_surface;
+        {
+            MICROPROFILE_SCOPE(Vulkan_RenderTargets);
+            zeta_surface = texture_cache.GetDepthBufferSurface(false);
+        }
+
+        zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal,
+                                 vk::PipelineStageFlagBits::eTransfer,
+                                 vk::AccessFlagBits::eTransferWrite);
+
+        const vk::ClearDepthStencilValue clear(regs.clear_depth,
+                                               static_cast<u32>(regs.clear_stencil));
+        scheduler.Record([image = zeta_surface->GetImage(),
+                          subresource = zeta_surface->GetImageSubresourceRange(),
+                          clear](auto cmdbuf, auto& dld) {
+            cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
+                                          subresource, dld);
+        });
+    }
+}
+
+void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
+    MICROPROFILE_SCOPE(Vulkan_Compute);
+    update_descriptor_queue.Acquire();
+    sampled_views.clear();
+    image_views.clear();
+
+    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+    const ComputePipelineCacheKey key{
+        code_addr,
+        launch_desc.shared_alloc,
+        {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
+    auto& pipeline = pipeline_cache.GetComputePipeline(key);
+
+    // Compute dispatches can't be executed inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    buffer_cache.Map(CalculateComputeStreamBufferSize());
+
+    const auto& entries = pipeline.GetEntries();
+    SetupComputeConstBuffers(entries);
+    SetupComputeGlobalBuffers(entries);
+    SetupComputeTexelBuffers(entries);
+    SetupComputeTextures(entries);
+    SetupComputeImages(entries);
+
+    buffer_cache.Unmap();
+
+    TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader,
+                     vk::AccessFlagBits::eShaderRead);
+    TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader,
+                     vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
+
+    if (device.IsNvDeviceDiagnosticCheckpoints()) {
+        scheduler.Record(
+            [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); });
+    }
+
+    scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
+                      grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
+                      layout = pipeline.GetLayout(),
+                      descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) {
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld);
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1,
+                                  &descriptor_set, 0, nullptr, dld);
+        cmdbuf.dispatch(grid_x, grid_y, grid_z, dld);
+    });
+}
+
+void RasterizerVulkan::FlushAll() {}
+
+void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
+    texture_cache.FlushRegion(addr, size);
+    buffer_cache.FlushRegion(addr, size);
+}
+
+void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
+    texture_cache.InvalidateRegion(addr, size);
+    pipeline_cache.InvalidateRegion(addr, size);
+    buffer_cache.InvalidateRegion(addr, size);
+}
+
+void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+    FlushRegion(addr, size);
+    InvalidateRegion(addr, size);
+}
+
+void RasterizerVulkan::FlushCommands() {
+    if (draw_counter > 0) {
+        draw_counter = 0;
+        scheduler.Flush();
+    }
+}
+
+void RasterizerVulkan::TickFrame() {
+    draw_counter = 0;
+    update_descriptor_queue.TickFrame();
+    buffer_cache.TickFrame();
+    staging_pool.TickFrame();
+}
+
+bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
+                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                                             const Tegra::Engines::Fermi2D::Config& copy_config) {
+    texture_cache.DoFermiCopy(src, dst, copy_config);
+    return true;
+}
+
+bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
+                                         VAddr framebuffer_addr, u32 pixel_stride) {
+    if (!framebuffer_addr) {
+        return false;
+    }
+
+    const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)};
+    const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
+    if (!surface) {
+        return false;
+    }
+
+    // Verify that the cached surface is the same size and format as the requested framebuffer
+    const auto& params{surface->GetSurfaceParams()};
+    const auto& pixel_format{
+        VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
+    ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
+    ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
+
+    screen_info.image = &surface->GetImage();
+    screen_info.width = params.width;
+    screen_info.height = params.height;
+    screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion;
+    return true;
+}
+
+void RasterizerVulkan::FlushWork() {
+    static constexpr u32 DRAWS_TO_DISPATCH = 4096;
+
+    // Only check multiples of 8 draws
+    static_assert(DRAWS_TO_DISPATCH % 8 == 0);
+    if ((++draw_counter & 7) != 7) {
+        return;
+    }
+
+    if (draw_counter < DRAWS_TO_DISPATCH) {
+        // Send recorded tasks to the worker thread
+        scheduler.DispatchWork();
+        return;
+    }
+
+    // Otherwise (every certain number of draws) flush execution.
+    // This submits commands to the Vulkan driver.
+    scheduler.Flush();
+    draw_counter = 0;
+}
+
+RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
+    MICROPROFILE_SCOPE(Vulkan_RenderTargets);
+    auto& dirty = system.GPU().Maxwell3D().dirty;
+    const bool update_rendertargets = dirty.render_settings;
+    dirty.render_settings = false;
+
+    texture_cache.GuardRenderTargets(true);
+
+    Texceptions texceptions;
+    for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
+        if (update_rendertargets) {
+            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
+        }
+        if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
+            texceptions.set(rt);
+        }
+    }
+
+    if (update_rendertargets) {
+        zeta_attachment = texture_cache.GetDepthBufferSurface(true);
+    }
+    if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
+        texceptions.set(ZETA_TEXCEPTION_INDEX);
+    }
+
+    texture_cache.GuardRenderTargets(false);
+
+    return texceptions;
+}
+
+bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) {
+    bool overlap = false;
+    for (auto& [view, layout] : sampled_views) {
+        if (!attachment.IsSameSurface(*view)) {
+            continue;
+        }
+        overlap = true;
+        *layout = vk::ImageLayout::eGeneral;
+    }
+    return overlap;
+}
+
+std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers(
+    vk::RenderPass renderpass) {
+    FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
+                            std::numeric_limits<u32>::max()};
+
+    const auto MarkAsModifiedAndPush = [&](const View& view) {
+        if (view == nullptr) {
+            return false;
+        }
+        key.views.push_back(view->GetHandle());
+        key.width = std::min(key.width, view->GetWidth());
+        key.height = std::min(key.height, view->GetHeight());
+        return true;
+    };
+
+    for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
+        if (MarkAsModifiedAndPush(color_attachments[index])) {
+            texture_cache.MarkColorBufferInUse(index);
+        }
+    }
+    if (MarkAsModifiedAndPush(zeta_attachment)) {
+        texture_cache.MarkDepthBufferInUse();
+    }
+
+    const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
+    auto& framebuffer = fbentry->second;
+    if (is_cache_miss) {
+        const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass,
+                                                       static_cast<u32>(key.views.size()),
+                                                       key.views.data(), key.width, key.height, 1);
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
+    }
+
+    return {*framebuffer, vk::Extent2D{key.width, key.height}};
+}
+
+RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
+                                                                 BufferBindings& buffer_bindings,
+                                                                 bool is_indexed,
+                                                                 bool is_instanced) {
+    MICROPROFILE_SCOPE(Vulkan_Geometry);
+
+    const auto& gpu = system.GPU().Maxwell3D();
+    const auto& regs = gpu.regs;
+
+    SetupVertexArrays(fixed_state.vertex_input, buffer_bindings);
+
+    const u32 base_instance = regs.vb_base_instance;
+    const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
+    const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
+    const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
+
+    DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
+    SetupIndexBuffer(buffer_bindings, params, is_indexed);
+
+    return params;
+}
+
+void RasterizerVulkan::SetupShaderDescriptors(
+    const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+    texture_cache.GuardSamplers(true);
+
+    for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+        // Skip VertexA stage
+        const auto& shader = shaders[stage + 1];
+        if (!shader) {
+            continue;
+        }
+        const auto& entries = shader->GetEntries();
+        SetupGraphicsConstBuffers(entries, stage);
+        SetupGraphicsGlobalBuffers(entries, stage);
+        SetupGraphicsTexelBuffers(entries, stage);
+        SetupGraphicsTextures(entries, stage);
+        SetupGraphicsImages(entries, stage);
+    }
+    texture_cache.GuardSamplers(false);
+}
+
+void RasterizerVulkan::SetupImageTransitions(
+    Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
+    const View& zeta_attachment) {
+    TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics,
+                     vk::AccessFlagBits::eShaderRead);
+    TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics,
+                     vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
+
+    for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) {
+        const auto color_attachment = color_attachments[rt];
+        if (color_attachment == nullptr) {
+            continue;
+        }
+        const auto image_layout =
+            texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal;
+        color_attachment->Transition(
+            image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput,
+            vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite);
+    }
+
+    if (zeta_attachment != nullptr) {
+        const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX]
+                                      ? vk::ImageLayout::eGeneral
+                                      : vk::ImageLayout::eDepthStencilAttachmentOptimal;
+        zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests,
+                                    vk::AccessFlagBits::eDepthStencilAttachmentRead |
+                                        vk::AccessFlagBits::eDepthStencilAttachmentWrite);
+    }
+}
+
+void RasterizerVulkan::UpdateDynamicStates() {
+    auto& gpu = system.GPU().Maxwell3D();
+    UpdateViewportsState(gpu);
+    UpdateScissorsState(gpu);
+    UpdateDepthBias(gpu);
+    UpdateBlendConstants(gpu);
+    UpdateDepthBounds(gpu);
+    UpdateStencilFaces(gpu);
+}
+
+void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
+                                         BufferBindings& buffer_bindings) {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+
+    for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) {
+        const auto& attrib = regs.vertex_attrib_format[index];
+        if (!attrib.IsValid()) {
+            continue;
+        }
+
+        const auto& buffer = regs.vertex_array[attrib.buffer];
+        ASSERT(buffer.IsEnabled());
+
+        vertex_input.attributes[vertex_input.num_attributes++] =
+            FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size,
+                                                attrib.offset);
+    }
+
+    for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) {
+        const auto& vertex_array = regs.vertex_array[index];
+        if (!vertex_array.IsEnabled()) {
+            continue;
+        }
+
+        const GPUVAddr start{vertex_array.StartAddress()};
+        const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
+
+        ASSERT(end > start);
+        const std::size_t size{end - start + 1};
+        const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
+
+        vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding(
+            index, vertex_array.stride,
+            regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
+        buffer_bindings.AddVertexBinding(buffer, offset);
+    }
+}
+
+void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
+                                        bool is_indexed) {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    switch (regs.draw.topology) {
+    case Maxwell::PrimitiveTopology::Quads:
+        if (params.is_indexed) {
+            UNIMPLEMENTED();
+        } else {
+            const auto [buffer, offset] =
+                quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
+            buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32);
+            params.base_vertex = 0;
+            params.num_vertices = params.num_vertices * 6 / 4;
+            params.is_indexed = true;
+        }
+        break;
+    default: {
+        if (!is_indexed) {
+            break;
+        }
+        const GPUVAddr gpu_addr = regs.index_array.IndexStart();
+        auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+
+        auto format = regs.index_array.format;
+        const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
+        if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
+            std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset);
+            format = Maxwell::IndexFormat::UnsignedShort;
+        }
+
+        buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
+        break;
+    }
+    }
+}
+
+void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
+    MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
+    const auto& gpu = system.GPU().Maxwell3D();
+    const auto& shader_stage = gpu.state.shader_stages[stage];
+    for (const auto& entry : entries.const_buffers) {
+        SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
+    }
+}
+
+void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
+    MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
+    auto& gpu{system.GPU()};
+    const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
+
+    for (const auto& entry : entries.global_buffers) {
+        const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
+        SetupGlobalBuffer(entry, addr);
+    }
+}
+
+void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
+    MICROPROFILE_SCOPE(Vulkan_Textures);
+    const auto& gpu = system.GPU().Maxwell3D();
+    for (const auto& entry : entries.texel_buffers) {
+        const auto image = GetTextureInfo(gpu, entry, stage).tic;
+        SetupTexelBuffer(image, entry);
+    }
+}
+
+void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
+    MICROPROFILE_SCOPE(Vulkan_Textures);
+    const auto& gpu = system.GPU().Maxwell3D();
+    for (const auto& entry : entries.samplers) {
+        const auto texture = GetTextureInfo(gpu, entry, stage);
+        SetupTexture(texture, entry);
+    }
+}
+
+void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
+    MICROPROFILE_SCOPE(Vulkan_Images);
+    const auto& gpu = system.GPU().KeplerCompute();
+    for (const auto& entry : entries.images) {
+        const auto tic = GetTextureInfo(gpu, entry, stage).tic;
+        SetupImage(tic, entry);
+    }
+}
+
+void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
+    MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
+    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+    for (const auto& entry : entries.const_buffers) {
+        const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
+        const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
+        Tegra::Engines::ConstBufferInfo buffer;
+        buffer.address = config.Address();
+        buffer.size = config.size;
+        buffer.enabled = mask[entry.GetIndex()];
+        SetupConstBuffer(entry, buffer);
+    }
+}
+
+void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
+    MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
+    const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config};
+    for (const auto& entry : entries.global_buffers) {
+        const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
+        SetupGlobalBuffer(entry, addr);
+    }
+}
+
+void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
+    MICROPROFILE_SCOPE(Vulkan_Textures);
+    const auto& gpu = system.GPU().KeplerCompute();
+    for (const auto& entry : entries.texel_buffers) {
+        const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+        SetupTexelBuffer(image, entry);
+    }
+}
+
+void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
+    MICROPROFILE_SCOPE(Vulkan_Textures);
+    const auto& gpu = system.GPU().KeplerCompute();
+    for (const auto& entry : entries.samplers) {
+        const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex);
+        SetupTexture(texture, entry);
+    }
+}
+
+void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
+    MICROPROFILE_SCOPE(Vulkan_Images);
+    const auto& gpu = system.GPU().KeplerCompute();
+    for (const auto& entry : entries.images) {
+        const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+        SetupImage(tic, entry);
+    }
+}
+
+void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
+                                        const Tegra::Engines::ConstBufferInfo& buffer) {
+    // Align the size to avoid bad std140 interactions
+    const std::size_t size =
+        Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
+    ASSERT(size <= MaxConstbufferSize);
+
+    const auto [buffer_handle, offset] =
+        buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
+
+    update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
+}
+
+void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
+    auto& memory_manager{system.GPU().MemoryManager()};
+    const auto actual_addr = memory_manager.Read<u64>(address);
+    const auto size = memory_manager.Read<u32>(address + 8);
+
+    if (size == 0) {
+        // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because
+        // Vulkan doesn't like empty buffers.
+        constexpr std::size_t dummy_size = 4;
+        const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
+        update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
+        return;
+    }
+
+    const auto [buffer, offset] = buffer_cache.UploadMemory(
+        actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
+    update_descriptor_queue.AddBuffer(buffer, offset, size);
+}
+
+void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
+                                        const TexelBufferEntry& entry) {
+    const auto view = texture_cache.GetTextureSurface(tic, entry);
+    ASSERT(view->IsBufferView());
+
+    update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
+}
+
+void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture,
+                                    const SamplerEntry& entry) {
+    auto view = texture_cache.GetTextureSurface(texture.tic, entry);
+    ASSERT(!view->IsBufferView());
+
+    const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
+                                            texture.tic.z_source, texture.tic.w_source);
+    const auto sampler = sampler_cache.GetSampler(texture.tsc);
+    update_descriptor_queue.AddSampledImage(sampler, image_view);
+
+    const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+    *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal;
+    sampled_views.push_back(ImageView{std::move(view), image_layout});
+}
+
+void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
+    auto view = texture_cache.GetImageSurface(tic, entry);
+
+    if (entry.IsWritten()) {
+        view->MarkAsModified(texture_cache.Tick());
+    }
+
+    UNIMPLEMENTED_IF(tic.IsBuffer());
+
+    const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+    update_descriptor_queue.AddImage(image_view);
+
+    const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+    *image_layout = vk::ImageLayout::eGeneral;
+    image_views.push_back(ImageView{std::move(view), image_layout});
+}
+
+void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) {
+        return;
+    }
+    gpu.dirty.viewport_transform = false;
+    const auto& regs = gpu.regs;
+    const std::array viewports{
+        GetViewportState(device, regs, 0),  GetViewportState(device, regs, 1),
+        GetViewportState(device, regs, 2),  GetViewportState(device, regs, 3),
+        GetViewportState(device, regs, 4),  GetViewportState(device, regs, 5),
+        GetViewportState(device, regs, 6),  GetViewportState(device, regs, 7),
+        GetViewportState(device, regs, 8),  GetViewportState(device, regs, 9),
+        GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
+        GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
+        GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
+    scheduler.Record([viewports](auto cmdbuf, auto& dld) {
+        cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld);
+    });
+}
+
+void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) {
+        return;
+    }
+    gpu.dirty.scissor_test = false;
+    const auto& regs = gpu.regs;
+    const std::array scissors = {
+        GetScissorState(regs, 0),  GetScissorState(regs, 1),  GetScissorState(regs, 2),
+        GetScissorState(regs, 3),  GetScissorState(regs, 4),  GetScissorState(regs, 5),
+        GetScissorState(regs, 6),  GetScissorState(regs, 7),  GetScissorState(regs, 8),
+        GetScissorState(regs, 9),  GetScissorState(regs, 10), GetScissorState(regs, 11),
+        GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
+        GetScissorState(regs, 15)};
+    scheduler.Record([scissors](auto cmdbuf, auto& dld) {
+        cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld);
+    });
+}
+
+void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) {
+        return;
+    }
+    gpu.dirty.polygon_offset = false;
+    const auto& regs = gpu.regs;
+    scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
+                      factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) {
+        cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld);
+    });
+}
+
+void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) {
+        return;
+    }
+    gpu.dirty.blend_state = false;
+    const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g,
+                                    gpu.regs.blend_color.b, gpu.regs.blend_color.a};
+    scheduler.Record([blend_color](auto cmdbuf, auto& dld) {
+        cmdbuf.setBlendConstants(blend_color.data(), dld);
+    });
+}
+
+void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) {
+        return;
+    }
+    gpu.dirty.depth_bounds_values = false;
+    const auto& regs = gpu.regs;
+    scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
+                         auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); });
+}
+
+void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) {
+        return;
+    }
+    gpu.dirty.stencil_test = false;
+    const auto& regs = gpu.regs;
+    if (regs.stencil_two_side_enable) {
+        // Separate values per face
+        scheduler.Record(
+            [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask,
+             front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref,
+             back_write_mask = regs.stencil_back_mask,
+             back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) {
+                // Front face
+                cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld);
+                cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld);
+                cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld);
+
+                // Back face
+                cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld);
+                cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld);
+                cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld);
+            });
+    } else {
+        // Front face defines both faces
+        scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask,
+                          test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) {
+            cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld);
+            cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld);
+            cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld);
+        });
+    }
+}
+
+std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
+    std::size_t size = CalculateVertexArraysSize();
+    if (is_indexed) {
+        size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
+    }
+    size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
+    return size;
+}
+
+std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
+    return Tegra::Engines::KeplerCompute::NumConstBuffers *
+           (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
+}
+
+std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+
+    std::size_t size = 0;
+    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
+        // This implementation assumes that all attributes are used in the shader.
+        const GPUVAddr start{regs.vertex_array[index].StartAddress()};
+        const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
+        DEBUG_ASSERT(end > start);
+
+        size += (end - start + 1) * regs.vertex_array[index].enable;
+    }
+    return size;
+}
+
+std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    return static_cast<std::size_t>(regs.index_array.count) *
+           static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+}
+
+std::size_t RasterizerVulkan::CalculateConstBufferSize(
+    const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
+    if (entry.IsIndirect()) {
+        // Buffer is accessed indirectly, so upload the entire thing
+        return buffer.size;
+    } else {
+        // Buffer is accessed directly, upload just what we use
+        return entry.GetSize();
+    }
+}
+
+RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
+    using namespace VideoCore::Surface;
+
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    RenderPassParams renderpass_params;
+
+    for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) {
+        const auto& rendertarget = regs.rt[rt];
+        if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE)
+            continue;
+        renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{
+            static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format),
+            texceptions.test(rt)});
+    }
+
+    renderpass_params.has_zeta = regs.zeta_enable;
+    if (renderpass_params.has_zeta) {
+        renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
+        renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
+    }
+
+    return renderpass_params;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
new file mode 100644
index 000000000..7be71e734
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -0,0 +1,263 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bitset>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include <boost/container/static_vector.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_accelerated.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_sampler_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Core {
+class System;
+}
+
+namespace Core::Frontend {
+class EmuWindow;
+}
+
+namespace Tegra::Engines {
+class Maxwell3D;
+}
+
+namespace Vulkan {
+
+struct VKScreenInfo;
+
+using ImageViewsPack =
+    boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>;
+
+struct FramebufferCacheKey {
+    vk::RenderPass renderpass{};
+    u32 width = 0;
+    u32 height = 0;
+    ImageViewsPack views;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = 0;
+        boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
+        for (const auto& view : views) {
+            boost::hash_combine(hash, static_cast<VkImageView>(view));
+        }
+        boost::hash_combine(hash, width);
+        boost::hash_combine(hash, height);
+        return hash;
+    }
+
+    bool operator==(const FramebufferCacheKey& rhs) const noexcept {
+        return std::tie(renderpass, views, width, height) ==
+               std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::FramebufferCacheKey> {
+    std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class BufferBindings;
+
+struct ImageView {
+    View view;
+    vk::ImageLayout* layout = nullptr;
+};
+
+class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
+public:
+    explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
+                              VKScreenInfo& screen_info, const VKDevice& device,
+                              VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                              VKScheduler& scheduler);
+    ~RasterizerVulkan() override;
+
+    bool DrawBatch(bool is_indexed) override;
+    bool DrawMultiBatch(bool is_indexed) override;
+    void Clear() override;
+    void DispatchCompute(GPUVAddr code_addr) override;
+    void FlushAll() override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushCommands() override;
+    void TickFrame() override;
+    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
+                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                               const Tegra::Engines::Fermi2D::Config& copy_config) override;
+    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
+                           u32 pixel_stride) override;
+
+    /// Maximum supported size that a constbuffer can have in bytes.
+    static constexpr std::size_t MaxConstbufferSize = 0x10000;
+    static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
+                  "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
+
+private:
+    struct DrawParameters {
+        void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const;
+
+        u32 base_instance = 0;
+        u32 num_instances = 0;
+        u32 base_vertex = 0;
+        u32 num_vertices = 0;
+        bool is_indexed = 0;
+    };
+
+    using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
+
+    static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
+
+    void Draw(bool is_indexed, bool is_instanced);
+
+    void FlushWork();
+
+    Texceptions UpdateAttachments();
+
+    std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass);
+
+    /// Setups geometry buffers and state.
+    DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
+                                 bool is_indexed, bool is_instanced);
+
+    /// Setup descriptors in the graphics pipeline.
+    void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
+
+    void SetupImageTransitions(Texceptions texceptions,
+                               const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
+                               const View& zeta_attachment);
+
+    void UpdateDynamicStates();
+
+    bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
+
+    void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
+                           BufferBindings& buffer_bindings);
+
+    void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
+
+    /// Setup constant buffers in the graphics pipeline.
+    void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup global buffers in the graphics pipeline.
+    void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup texel buffers in the graphics pipeline.
+    void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup textures in the graphics pipeline.
+    void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup images in the graphics pipeline.
+    void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup constant buffers in the compute pipeline.
+    void SetupComputeConstBuffers(const ShaderEntries& entries);
+
+    /// Setup global buffers in the compute pipeline.
+    void SetupComputeGlobalBuffers(const ShaderEntries& entries);
+
+    /// Setup texel buffers in the compute pipeline.
+    void SetupComputeTexelBuffers(const ShaderEntries& entries);
+
+    /// Setup textures in the compute pipeline.
+    void SetupComputeTextures(const ShaderEntries& entries);
+
+    /// Setup images in the compute pipeline.
+    void SetupComputeImages(const ShaderEntries& entries);
+
+    void SetupConstBuffer(const ConstBufferEntry& entry,
+                          const Tegra::Engines::ConstBufferInfo& buffer);
+
+    void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
+
+    void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
+
+    void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
+
+    void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
+
+    void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu);
+
+    std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
+
+    std::size_t CalculateComputeStreamBufferSize() const;
+
+    std::size_t CalculateVertexArraysSize() const;
+
+    std::size_t CalculateIndexBufferSize() const;
+
+    std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
+                                         const Tegra::Engines::ConstBufferInfo& buffer) const;
+
+    RenderPassParams GetRenderPassParams(Texceptions texceptions) const;
+
+    Core::System& system;
+    Core::Frontend::EmuWindow& render_window;
+    VKScreenInfo& screen_info;
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+
+    VKStagingBufferPool staging_pool;
+    VKDescriptorPool descriptor_pool;
+    VKUpdateDescriptorQueue update_descriptor_queue;
+    QuadArrayPass quad_array_pass;
+    Uint8Pass uint8_pass;
+
+    VKTextureCache texture_cache;
+    VKPipelineCache pipeline_cache;
+    VKBufferCache buffer_cache;
+    VKSamplerCache sampler_cache;
+
+    std::array<View, Maxwell::NumRenderTargets> color_attachments;
+    View zeta_attachment;
+
+    std::vector<ImageView> sampled_views;
+    std::vector<ImageView> image_views;
+
+    u32 draw_counter = 0;
+
+    // TODO(Rodrigo): Invalidate on image destruction
+    std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
new file mode 100644
index 000000000..93f5d7ba0
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -0,0 +1,100 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+
+namespace Vulkan {
+
+VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
+
+VKRenderPassCache::~VKRenderPassCache() = default;
+
+vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
+    const auto [pair, is_cache_miss] = cache.try_emplace(params);
+    auto& entry = pair->second;
+    if (is_cache_miss) {
+        entry = CreateRenderPass(params);
+    }
+    return *entry;
+}
+
+UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
+    std::vector<vk::AttachmentDescription> descriptors;
+    std::vector<vk::AttachmentReference> color_references;
+
+    for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
+        const auto attachment = params.color_attachments[rt];
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
+        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
+                   static_cast<u32>(attachment.pixel_format));
+
+        // TODO(Rodrigo): Add eMayAlias when it's needed.
+        const auto color_layout = attachment.is_texception
+                                      ? vk::ImageLayout::eGeneral
+                                      : vk::ImageLayout::eColorAttachmentOptimal;
+        descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format,
+                                 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
+                                 vk::AttachmentStoreOp::eDontCare, color_layout, color_layout);
+        color_references.emplace_back(static_cast<u32>(rt), color_layout);
+    }
+
+    vk::AttachmentReference zeta_attachment_ref;
+    if (params.has_zeta) {
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
+        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
+                   static_cast<u32>(params.zeta_pixel_format));
+
+        const auto zeta_layout = params.zeta_texception
+                                     ? vk::ImageLayout::eGeneral
+                                     : vk::ImageLayout::eDepthStencilAttachmentOptimal;
+        descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format,
+                                 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout);
+        zeta_attachment_ref =
+            vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout);
+    }
+
+    const vk::SubpassDescription subpass_description(
+        {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()),
+        color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0,
+        nullptr);
+
+    vk::AccessFlags access;
+    vk::PipelineStageFlags stage;
+    if (!color_references.empty()) {
+        access |=
+            vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
+        stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
+    }
+
+    if (params.has_zeta) {
+        access |= vk::AccessFlagBits::eDepthStencilAttachmentRead |
+                  vk::AccessFlagBits::eDepthStencilAttachmentWrite;
+        stage |= vk::PipelineStageFlagBits::eLateFragmentTests;
+    }
+
+    const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access,
+                                                   {});
+
+    const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()),
+                                               descriptors.data(), 1, &subpass_description, 1,
+                                               &subpass_dependency);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createRenderPassUnique(create_info, nullptr, dld);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
new file mode 100644
index 000000000..b49b2db48
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
@@ -0,0 +1,97 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+#include <unordered_map>
+
+#include <boost/container/static_vector.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/surface.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+// TODO(Rodrigo): Optimize this structure for faster hashing
+
+struct RenderPassParams {
+    struct ColorAttachment {
+        u32 index = 0;
+        VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+        bool is_texception = false;
+
+        std::size_t Hash() const noexcept {
+            return static_cast<std::size_t>(pixel_format) |
+                   static_cast<std::size_t>(is_texception) << 6 |
+                   static_cast<std::size_t>(index) << 7;
+        }
+
+        bool operator==(const ColorAttachment& rhs) const noexcept {
+            return std::tie(index, pixel_format, is_texception) ==
+                   std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
+        }
+    };
+
+    boost::container::static_vector<ColorAttachment,
+                                    Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
+        color_attachments{};
+    // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
+    VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+    bool has_zeta = false;
+    bool zeta_texception = false;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = 0;
+        for (const auto& rt : color_attachments) {
+            boost::hash_combine(hash, rt.Hash());
+        }
+        boost::hash_combine(hash, zeta_pixel_format);
+        boost::hash_combine(hash, has_zeta);
+        boost::hash_combine(hash, zeta_texception);
+        return hash;
+    }
+
+    bool operator==(const RenderPassParams& rhs) const {
+        return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
+               std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
+                        rhs.zeta_texception);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::RenderPassParams> {
+    std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class VKRenderPassCache final {
+public:
+    explicit VKRenderPassCache(const VKDevice& device);
+    ~VKRenderPassCache();
+
+    vk::RenderPass GetRenderPass(const RenderPassParams& params);
+
+private:
+    UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const;
+
+    const VKDevice& device;
+    std::unordered_map<RenderPassParams, UniqueRenderPass> cache;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index 1ce583f75..0a8ec8398 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -46,9 +46,9 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
         {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
         MaxwellToVK::Sampler::Filter(tsc.min_filter),
         MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
         has_anisotropy, max_anisotropy, tsc.depth_compare_enabled,
         MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
         tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a8baf91de..0cf97cafa 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -954,6 +954,10 @@ private:
 
     Expression Visit(const Node& node) {
         if (const auto operation = std::get_if<OperationNode>(&*node)) {
+            if (const auto amend_index = operation->GetAmendIndex()) {
+                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
+                ASSERT(type == Type::Void);
+            }
             const auto operation_index = static_cast<std::size_t>(operation->GetCode());
             const auto decompiler = operation_decompilers[operation_index];
             if (decompiler == nullptr) {
@@ -1142,6 +1146,10 @@ private:
         }
 
         if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+            if (const auto amend_index = conditional->GetAmendIndex()) {
+                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
+                ASSERT(type == Type::Void);
+            }
             // It's invalid to call conditional on nested nodes, use an operation instead
             const Id true_label = OpLabel();
             const Id skip_label = OpLabel();
@@ -1788,6 +1796,11 @@ private:
         return {};
     }
 
+    Expression UAtomicAdd(Operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
     Expression Branch(Operation operation) {
         const auto& target = std::get<ImmediateNode>(*operation[0]);
         OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@@ -2365,6 +2378,8 @@ private:
         &SPIRVDecompiler::AtomicImageXor,
         &SPIRVDecompiler::AtomicImageExchange,
 
+        &SPIRVDecompiler::UAtomicAdd,
+
         &SPIRVDecompiler::Branch,
         &SPIRVDecompiler::BranchIndirect,
         &SPIRVDecompiler::PushFlowStack,
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
new file mode 100644
index 000000000..b97c4cb3d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -0,0 +1,34 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
+
+namespace Vulkan {
+
+UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) {
+    // Avoid undefined behavior by copying to a staging allocation
+    ASSERT(code_size % sizeof(u32) == 0);
+    const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
+    std::memcpy(data.get(), code_data, code_size);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const vk::ShaderModuleCreateInfo shader_ci({}, code_size, data.get());
+    vk::ShaderModule shader_module;
+    if (dev.createShaderModule(&shader_ci, nullptr, &shader_module, dld) != vk::Result::eSuccess) {
+        UNREACHABLE_MSG("Shader module failed to build!");
+    }
+
+    return UniqueShaderModule(shader_module, vk::ObjectDestroy(dev, nullptr, dld));
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h
new file mode 100644
index 000000000..c06d65970
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -0,0 +1,17 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data);
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 02310375f..4d9488f49 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -13,6 +13,7 @@
 
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
 
 namespace Vulkan {
 
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 62f1427f5..d48d3b44c 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,86 +3,144 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
-#include <memory>
 #include <optional>
+#include <tuple>
 #include <vector>
 
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 
 namespace Vulkan {
 
+namespace {
+
 constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
 constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
 
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
-                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
-                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
-    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
-                                                                                   pipeline_stage} {
-    CreateBuffers(memory_manager, usage);
-    ReserveWatches(WATCHES_INITIAL_RESERVE);
+constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
+
+std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
+                                  vk::MemoryPropertyFlags wanted) {
+    const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
+    for (u32 i = 0; i < properties.memoryTypeCount; i++) {
+        if (!(filter & (1 << i))) {
+            continue;
+        }
+        if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
+            return i;
+        }
+    }
+    return {};
+}
+
+} // Anonymous namespace
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+                               vk::BufferUsageFlags usage)
+    : device{device}, scheduler{scheduler} {
+    CreateBuffers(usage);
+    ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
+    ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
 }
 
 VKStreamBuffer::~VKStreamBuffer() = default;
 
-std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
-    ASSERT(size <= buffer_size);
+std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
+    ASSERT(size <= STREAM_BUFFER_SIZE);
     mapped_size = size;
 
-    if (offset + size > buffer_size) {
-        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
-        // reset the state.
-        invalidation_mark = used_watches;
-        used_watches = 0;
+    if (alignment > 0) {
+        offset = Common::AlignUp(offset, alignment);
+    }
+
+    WaitPendingOperations(offset);
+
+    bool invalidated = false;
+    if (offset + size > STREAM_BUFFER_SIZE) {
+        // The buffer would overflow, save the amount of used watches and reset the state.
+        invalidation_mark = current_watch_cursor;
+        current_watch_cursor = 0;
         offset = 0;
+
+        // Swap watches and reset waiting cursors.
+        std::swap(previous_watches, current_watches);
+        wait_cursor = 0;
+        wait_bound = 0;
+
+        // Ensure that we don't wait for uncommitted fences.
+        scheduler.Flush();
+
+        invalidated = true;
     }
 
-    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
+    return {pointer, offset, invalidated};
 }
 
-void VKStreamBuffer::Send(u64 size) {
+void VKStreamBuffer::Unmap(u64 size) {
     ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
 
-    if (invalidation_mark) {
-        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
-        scheduler.Flush();
-        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
-                      [&](auto& resource) { resource->Wait(); });
-        invalidation_mark = std::nullopt;
-    }
+    const auto dev = device.GetLogical();
+    dev.unmapMemory(*memory, device.GetDispatchLoader());
+
+    offset += size;
 
-    if (used_watches + 1 >= watches.size()) {
+    if (current_watch_cursor + 1 >= current_watches.size()) {
         // Ensure that there are enough watches.
-        ReserveWatches(WATCHES_RESERVE_CHUNK);
+        ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
     }
-    // Add a watch for this allocation.
-    watches[used_watches++]->Watch(scheduler.GetFence());
-
-    offset += size;
+    auto& watch = current_watches[current_watch_cursor++];
+    watch.upper_bound = offset;
+    watch.fence.Watch(scheduler.GetFence());
 }
 
-void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
-    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
-                                         nullptr);
-
+void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
+    const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
+                                         0, nullptr);
     const auto dev = device.GetLogical();
     const auto& dld = device.GetDispatchLoader();
     buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
-    commit = memory_manager.Commit(*buffer, true);
-    mapped_pointer = commit->GetData();
+
+    const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
+    // Prefer device local host visible allocations (this should hit AMD's pinned memory).
+    auto type = FindMemoryType(device, requirements.memoryTypeBits,
+                               vk::MemoryPropertyFlagBits::eHostVisible |
+                                   vk::MemoryPropertyFlagBits::eHostCoherent |
+                                   vk::MemoryPropertyFlagBits::eDeviceLocal);
+    if (!type) {
+        // Otherwise search for a host visible allocation.
+        type = FindMemoryType(device, requirements.memoryTypeBits,
+                              vk::MemoryPropertyFlagBits::eHostVisible |
+                                  vk::MemoryPropertyFlagBits::eHostCoherent);
+        ASSERT_MSG(type, "No host visible and coherent memory type found");
+    }
+    const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
+    memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
+
+    dev.bindBufferMemory(*buffer, *memory, 0, dld);
 }
 
-void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
-    const std::size_t previous_size = watches.size();
-    watches.resize(previous_size + grow_size);
-    std::generate(watches.begin() + previous_size, watches.end(),
-                  []() { return std::make_unique<VKFenceWatch>(); });
+void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
+    watches.resize(watches.size() + grow_size);
+}
+
+void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
+    if (!invalidation_mark) {
+        return;
+    }
+    while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
+        auto& watch = previous_watches[wait_cursor];
+        wait_bound = watch.upper_bound;
+        watch.fence.Wait();
+        ++wait_cursor;
+    }
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 842e54162..187c0c612 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -4,28 +4,24 @@
 
 #pragma once
 
-#include <memory>
 #include <optional>
 #include <tuple>
 #include <vector>
 
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 
 namespace Vulkan {
 
 class VKDevice;
 class VKFence;
 class VKFenceWatch;
-class VKResourceManager;
 class VKScheduler;
 
-class VKStreamBuffer {
+class VKStreamBuffer final {
 public:
-    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
-                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
-                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+    explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+                            vk::BufferUsageFlags usage);
     ~VKStreamBuffer();
 
     /**
@@ -34,39 +30,47 @@ public:
      * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
      * offset and a boolean that's true when buffer has been invalidated.
      */
-    std::tuple<u8*, u64, bool> Reserve(u64 size);
+    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
 
     /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
-    void Send(u64 size);
+    void Unmap(u64 size);
 
-    vk::Buffer GetBuffer() const {
+    vk::Buffer GetHandle() const {
         return *buffer;
     }
 
 private:
+    struct Watch final {
+        VKFenceWatch fence;
+        u64 upper_bound{};
+    };
+
     /// Creates Vulkan buffer handles committing the required the required memory.
-    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+    void CreateBuffers(vk::BufferUsageFlags usage);
 
     /// Increases the amount of watches available.
-    void ReserveWatches(std::size_t grow_size);
+    void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
+
+    void WaitPendingOperations(u64 requested_upper_bound);
 
     const VKDevice& device;                      ///< Vulkan device manager.
     VKScheduler& scheduler;                      ///< Command scheduler.
-    const u64 buffer_size;                       ///< Total size of the stream buffer.
     const vk::AccessFlags access;                ///< Access usage of this stream buffer.
     const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
 
-    UniqueBuffer buffer;   ///< Mapped buffer.
-    VKMemoryCommit commit; ///< Memory commit.
-    u8* mapped_pointer{};  ///< Pointer to the host visible commit
+    UniqueBuffer buffer;       ///< Mapped buffer.
+    UniqueDeviceMemory memory; ///< Memory allocation.
 
     u64 offset{};      ///< Buffer iterator.
     u64 mapped_size{}; ///< Size reserved for the current copy.
 
-    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
-    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
-    std::optional<std::size_t>
-        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+    std::vector<Watch> current_watches;           ///< Watches recorded in the current iteration.
+    std::size_t current_watch_cursor{};           ///< Count of watches, reset on invalidation.
+    std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
+
+    std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
+    std::size_t wait_cursor{};           ///< Last watch being waited for completion.
+    u64 wait_bound{};                    ///< Highest offset being watched for completion.
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
new file mode 100644
index 000000000..51b0d38a6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -0,0 +1,475 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <cstring>
+#include <memory>
+#include <variant>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/morton.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/surface.h"
+#include "video_core/textures/convert.h"
+
+namespace Vulkan {
+
+using VideoCore::MortonSwizzle;
+using VideoCore::MortonSwizzleMode;
+
+using Tegra::Texture::SwizzleSource;
+using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::SurfaceCompression;
+using VideoCore::Surface::SurfaceTarget;
+
+namespace {
+
+vk::ImageType SurfaceTargetToImage(SurfaceTarget target) {
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture1DArray:
+        return vk::ImageType::e1D;
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray:
+        return vk::ImageType::e2D;
+    case SurfaceTarget::Texture3D:
+        return vk::ImageType::e3D;
+    }
+    UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target));
+    return {};
+}
+
+vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) {
+    if (pixel_format < PixelFormat::MaxColorFormat) {
+        return vk::ImageAspectFlagBits::eColor;
+    } else if (pixel_format < PixelFormat::MaxDepthFormat) {
+        return vk::ImageAspectFlagBits::eDepth;
+    } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) {
+        return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
+    } else {
+        UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format));
+        return vk::ImageAspectFlagBits::eColor;
+    }
+}
+
+vk::ImageViewType GetImageViewType(SurfaceTarget target) {
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+        return vk::ImageViewType::e1D;
+    case SurfaceTarget::Texture2D:
+        return vk::ImageViewType::e2D;
+    case SurfaceTarget::Texture3D:
+        return vk::ImageViewType::e3D;
+    case SurfaceTarget::Texture1DArray:
+        return vk::ImageViewType::e1DArray;
+    case SurfaceTarget::Texture2DArray:
+        return vk::ImageViewType::e2DArray;
+    case SurfaceTarget::TextureCubemap:
+        return vk::ImageViewType::eCube;
+    case SurfaceTarget::TextureCubeArray:
+        return vk::ImageViewType::eCubeArray;
+    case SurfaceTarget::TextureBuffer:
+        break;
+    }
+    UNREACHABLE();
+    return {};
+}
+
+UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
+    // TODO(Rodrigo): Move texture buffer creation to the buffer cache
+    const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(),
+                                         vk::BufferUsageFlagBits::eUniformTexelBuffer |
+                                             vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eTransferDst,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createBufferUnique(buffer_ci, nullptr, dld);
+}
+
+vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
+                                                      const SurfaceParams& params,
+                                                      vk::Buffer buffer) {
+    ASSERT(params.IsBuffer());
+
+    const auto format =
+        MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
+    return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes());
+}
+
+vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
+    constexpr auto sample_count = vk::SampleCountFlagBits::e1;
+    constexpr auto tiling = vk::ImageTiling::eOptimal;
+
+    ASSERT(!params.IsBuffer());
+
+    const auto [format, attachable, storage] =
+        MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format);
+
+    auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst |
+                       vk::ImageUsageFlagBits::eTransferSrc;
+    if (attachable) {
+        image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment
+                                                  : vk::ImageUsageFlagBits::eColorAttachment;
+    }
+    if (storage) {
+        image_usage |= vk::ImageUsageFlagBits::eStorage;
+    }
+
+    vk::ImageCreateFlags flags;
+    vk::Extent3D extent;
+    switch (params.target) {
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray:
+        flags |= vk::ImageCreateFlagBits::eCubeCompatible;
+        [[fallthrough]];
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture2DArray:
+        extent = vk::Extent3D(params.width, params.height, 1);
+        break;
+    case SurfaceTarget::Texture3D:
+        extent = vk::Extent3D(params.width, params.height, params.depth);
+        break;
+    case SurfaceTarget::TextureBuffer:
+        UNREACHABLE();
+    }
+
+    return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent,
+                               params.num_levels, static_cast<u32>(params.GetNumLayers()),
+                               sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0,
+                               nullptr, vk::ImageLayout::eUndefined);
+}
+
+} // Anonymous namespace
+
+CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
+                             VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
+                             GPUVAddr gpu_addr, const SurfaceParams& params)
+    : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device},
+      resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
+      staging_pool{staging_pool} {
+    if (params.IsBuffer()) {
+        buffer = CreateBuffer(device, params);
+        commit = memory_manager.Commit(*buffer, false);
+
+        const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer);
+        format = buffer_view_ci.format;
+
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld);
+    } else {
+        const auto image_ci = GenerateImageCreateInfo(device, params);
+        format = image_ci.format;
+
+        image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format));
+        commit = memory_manager.Commit(image->GetHandle(), false);
+    }
+
+    // TODO(Rodrigo): Move this to a virtual function.
+    main_view = CreateViewInner(
+        ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
+        true);
+}
+
+CachedSurface::~CachedSurface() = default;
+
+void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
+    // To upload data we have to be outside of a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    if (params.IsBuffer()) {
+        UploadBuffer(staging_buffer);
+    } else {
+        UploadImage(staging_buffer);
+    }
+}
+
+void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
+    UNIMPLEMENTED_IF(params.IsBuffer());
+
+    if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+        LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed");
+    }
+
+    // We can't copy images to buffers inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead,
+                   vk::ImageLayout::eTransferSrcOptimal);
+
+    const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
+    // TODO(Rodrigo): Do this in a single copy
+    for (u32 level = 0; level < params.num_levels; ++level) {
+        scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle,
+                          copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) {
+            cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy},
+                                     dld);
+        });
+    }
+    scheduler.Finish();
+
+    // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy.
+    std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size);
+}
+
+void CachedSurface::DecorateSurfaceName() {
+    // TODO(Rodrigo): Add name decorations
+}
+
+View CachedSurface::CreateView(const ViewParams& params) {
+    return CreateViewInner(params, false);
+}
+
+View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
+    // TODO(Rodrigo): Add name decorations
+    return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
+}
+
+void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
+    const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
+    std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
+
+    scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
+                      size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) {
+        const vk::BufferCopy copy(0, 0, size);
+        cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld);
+
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite,
+                                     vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)},
+            {}, dld);
+    });
+}
+
+void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
+    const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
+    std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
+
+    FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite,
+                   vk::ImageLayout::eTransferDstOptimal);
+
+    for (u32 level = 0; level < params.num_levels; ++level) {
+        vk::BufferImageCopy copy = GetBufferImageCopy(level);
+        const auto& dld = device.GetDispatchLoader();
+        if (image->GetAspectMask() ==
+            (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
+            vk::BufferImageCopy depth = copy;
+            vk::BufferImageCopy stencil = copy;
+            depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
+            stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
+            scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth,
+                              stencil](auto cmdbuf, auto& dld) {
+                cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
+                                         {depth, stencil}, dld);
+            });
+        } else {
+            scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(),
+                              copy](auto cmdbuf, auto& dld) {
+                cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
+                                         {copy}, dld);
+            });
+        }
+    }
+}
+
+vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
+    const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
+    const auto compression_type = params.GetCompressionType();
+    const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
+                                       ? params.GetConvertedMipmapOffset(level)
+                                       : params.GetHostMipmapLevelOffset(level);
+
+    return vk::BufferImageCopy(
+        mip_offset, 0, 0,
+        {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0},
+        {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth});
+}
+
+vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
+    return {image->GetAspectMask(), 0, params.num_levels, 0,
+            static_cast<u32>(params.GetNumLayers())};
+}
+
+CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
+                                     const ViewParams& params, bool is_proxy)
+    : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
+      image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
+      aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
+      base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
+      num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
+                                                           : vk::ImageViewType{}} {}
+
+CachedSurfaceView::~CachedSurfaceView() = default;
+
+vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
+                                           SwizzleSource z_source, SwizzleSource w_source) {
+    const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
+    if (last_image_view && last_swizzle == swizzle) {
+        return last_image_view;
+    }
+    last_swizzle = swizzle;
+
+    const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle);
+    auto& image_view = entry->second;
+    if (!is_cache_miss) {
+        return last_image_view = *image_view;
+    }
+
+    auto swizzle_x = MaxwellToVK::SwizzleSource(x_source);
+    auto swizzle_y = MaxwellToVK::SwizzleSource(y_source);
+    auto swizzle_z = MaxwellToVK::SwizzleSource(z_source);
+    auto swizzle_w = MaxwellToVK::SwizzleSource(w_source);
+
+    if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+        // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
+        std::swap(swizzle_x, swizzle_z);
+    }
+
+    // Games can sample depth or stencil values on textures. This is decided by the swizzle value on
+    // hardware. To emulate this on Vulkan we specify it in the aspect.
+    vk::ImageAspectFlags aspect = aspect_mask;
+    if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
+        UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
+        const bool is_first = x_source == SwizzleSource::R;
+        switch (params.pixel_format) {
+        case VideoCore::Surface::PixelFormat::Z24S8:
+        case VideoCore::Surface::PixelFormat::Z32FS8:
+            aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil;
+            break;
+        case VideoCore::Surface::PixelFormat::S8Z24:
+            aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth;
+            break;
+        default:
+            aspect = vk::ImageAspectFlagBits::eDepth;
+            UNIMPLEMENTED();
+        }
+
+        // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity
+        swizzle_x = vk::ComponentSwizzle::eR;
+        swizzle_y = vk::ComponentSwizzle::eG;
+        swizzle_z = vk::ComponentSwizzle::eB;
+        swizzle_w = vk::ComponentSwizzle::eA;
+    }
+
+    const vk::ImageViewCreateInfo image_view_ci(
+        {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(),
+        {swizzle_x, swizzle_y, swizzle_z, swizzle_w},
+        {aspect, base_level, num_levels, base_layer, num_layers});
+
+    const auto dev = device.GetLogical();
+    image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader());
+    return last_image_view = *image_view;
+}
+
+VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                               const VKDevice& device, VKResourceManager& resource_manager,
+                               VKMemoryManager& memory_manager, VKScheduler& scheduler,
+                               VKStagingBufferPool& staging_pool)
+    : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager},
+      memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {}
+
+VKTextureCache::~VKTextureCache() = default;
+
+Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
+    return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager,
+                                           scheduler, staging_pool, gpu_addr, params);
+}
+
+void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
+                               const VideoCommon::CopyParams& copy_params) {
+    const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
+    const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
+    UNIMPLEMENTED_IF(src_3d);
+
+    // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and
+    // dimension respectively.
+    const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z;
+    const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0;
+
+    const u32 extent_z = dst_3d ? copy_params.depth : 1;
+    const u32 num_layers = dst_3d ? 1 : copy_params.depth;
+
+    // We can't copy inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1,
+                            vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead,
+                            vk::ImageLayout::eTransferSrcOptimal);
+    dst_surface->Transition(
+        dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer,
+        vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal);
+
+    const auto& dld{device.GetDispatchLoader()};
+    const vk::ImageSubresourceLayers src_subresource(
+        src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers);
+    const vk::ImageSubresourceLayers dst_subresource(
+        dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers);
+    const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0);
+    const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z);
+    const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z);
+    const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent);
+    const vk::Image src_image = src_surface->GetImageHandle();
+    const vk::Image dst_image = dst_surface->GetImageHandle();
+    scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) {
+        cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
+                         vk::ImageLayout::eTransferDstOptimal, {copy}, dld);
+    });
+}
+
+void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
+                               const Tegra::Engines::Fermi2D::Config& copy_config) {
+    // We can't blit inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer,
+                         vk::AccessFlagBits::eTransferRead);
+    dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer,
+                         vk::AccessFlagBits::eTransferWrite);
+
+    const auto& cfg = copy_config;
+    const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0);
+    const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1);
+    const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0);
+    const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1);
+    const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right},
+                             dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right});
+    const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
+
+    const auto& dld{device.GetDispatchLoader()};
+    scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
+                      is_linear](auto cmdbuf, auto& dld) {
+        cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
+                         vk::ImageLayout::eTransferDstOptimal, {blit},
+                         is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld);
+    });
+}
+
+void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) {
+    // Currently unimplemented. PBO copies should be dropped and we should use a render pass to
+    // convert from color to depth and viceversa.
+    LOG_WARNING(Render_Vulkan, "Unimplemented");
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
new file mode 100644
index 000000000..d3edbe80c
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -0,0 +1,239 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/texture_cache/surface_base.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/textures/decoders.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Vulkan {
+
+class RasterizerVulkan;
+class VKDevice;
+class VKResourceManager;
+class VKScheduler;
+class VKStagingBufferPool;
+
+class CachedSurfaceView;
+class CachedSurface;
+
+using Surface = std::shared_ptr<CachedSurface>;
+using View = std::shared_ptr<CachedSurfaceView>;
+using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
+
+using VideoCommon::SurfaceParams;
+using VideoCommon::ViewParams;
+
+class CachedSurface final : public VideoCommon::SurfaceBase<View> {
+    friend CachedSurfaceView;
+
+public:
+    explicit CachedSurface(Core::System& system, const VKDevice& device,
+                           VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
+                           GPUVAddr gpu_addr, const SurfaceParams& params);
+    ~CachedSurface();
+
+    void UploadTexture(const std::vector<u8>& staging_buffer) override;
+    void DownloadTexture(std::vector<u8>& staging_buffer) override;
+
+    void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                        vk::ImageLayout new_layout) {
+        image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels,
+                          new_stage_mask, new_access, new_layout);
+    }
+
+    void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                    vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                    vk::ImageLayout new_layout) {
+        image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
+                          new_access, new_layout);
+    }
+
+    VKImage& GetImage() {
+        return *image;
+    }
+
+    const VKImage& GetImage() const {
+        return *image;
+    }
+
+    vk::Image GetImageHandle() const {
+        return image->GetHandle();
+    }
+
+    vk::ImageAspectFlags GetAspectMask() const {
+        return image->GetAspectMask();
+    }
+
+    vk::BufferView GetBufferViewHandle() const {
+        return *buffer_view;
+    }
+
+protected:
+    void DecorateSurfaceName();
+
+    View CreateView(const ViewParams& params) override;
+    View CreateViewInner(const ViewParams& params, bool is_proxy);
+
+private:
+    void UploadBuffer(const std::vector<u8>& staging_buffer);
+
+    void UploadImage(const std::vector<u8>& staging_buffer);
+
+    vk::BufferImageCopy GetBufferImageCopy(u32 level) const;
+
+    vk::ImageSubresourceRange GetImageSubresourceRange() const;
+
+    Core::System& system;
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
+
+    std::optional<VKImage> image;
+    UniqueBuffer buffer;
+    UniqueBufferView buffer_view;
+    VKMemoryCommit commit;
+
+    vk::Format format;
+};
+
+class CachedSurfaceView final : public VideoCommon::ViewBase {
+public:
+    explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
+                               const ViewParams& params, bool is_proxy);
+    ~CachedSurfaceView();
+
+    vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
+                            Tegra::Texture::SwizzleSource y_source,
+                            Tegra::Texture::SwizzleSource z_source,
+                            Tegra::Texture::SwizzleSource w_source);
+
+    bool IsSameSurface(const CachedSurfaceView& rhs) const {
+        return &surface == &rhs.surface;
+    }
+
+    vk::ImageView GetHandle() {
+        return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
+                         Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
+    }
+
+    u32 GetWidth() const {
+        return params.GetMipWidth(base_level);
+    }
+
+    u32 GetHeight() const {
+        return params.GetMipHeight(base_level);
+    }
+
+    bool IsBufferView() const {
+        return buffer_view;
+    }
+
+    vk::Image GetImage() const {
+        return image;
+    }
+
+    vk::BufferView GetBufferView() const {
+        return buffer_view;
+    }
+
+    vk::ImageSubresourceRange GetImageSubresourceRange() const {
+        return {aspect_mask, base_level, num_levels, base_layer, num_layers};
+    }
+
+    vk::ImageSubresourceLayers GetImageSubresourceLayers() const {
+        return {surface.GetAspectMask(), base_level, base_layer, num_layers};
+    }
+
+    void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask,
+                    vk::AccessFlags new_access) const {
+        surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
+                           new_access, new_layout);
+    }
+
+    void MarkAsModified(u64 tick) {
+        surface.MarkAsModified(true, tick);
+    }
+
+private:
+    static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
+                             Tegra::Texture::SwizzleSource y_source,
+                             Tegra::Texture::SwizzleSource z_source,
+                             Tegra::Texture::SwizzleSource w_source) {
+        return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
+               (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+    }
+
+    // Store a copy of these values to avoid double dereference when reading them
+    const SurfaceParams params;
+    const vk::Image image;
+    const vk::BufferView buffer_view;
+    const vk::ImageAspectFlags aspect_mask;
+
+    const VKDevice& device;
+    CachedSurface& surface;
+    const u32 base_layer;
+    const u32 num_layers;
+    const u32 base_level;
+    const u32 num_levels;
+    const vk::ImageViewType image_view_type;
+
+    vk::ImageView last_image_view;
+    u32 last_swizzle{};
+
+    std::unordered_map<u32, UniqueImageView> view_cache;
+};
+
+class VKTextureCache final : public TextureCacheBase {
+public:
+    explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                            const VKDevice& device, VKResourceManager& resource_manager,
+                            VKMemoryManager& memory_manager, VKScheduler& scheduler,
+                            VKStagingBufferPool& staging_pool);
+    ~VKTextureCache();
+
+private:
+    Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
+
+    void ImageCopy(Surface& src_surface, Surface& dst_surface,
+                   const VideoCommon::CopyParams& copy_params) override;
+
+    void ImageBlit(View& src_view, View& dst_view,
+                   const Tegra::Engines::Fermi2D::Config& copy_config) override;
+
+    void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
+
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
new file mode 100644
index 000000000..0e577b9ff
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -0,0 +1,57 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <variant>
+#include <boost/container/static_vector.hpp>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler)
+    : device{device}, scheduler{scheduler} {}
+
+VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
+
+void VKUpdateDescriptorQueue::TickFrame() {
+    payload.clear();
+}
+
+void VKUpdateDescriptorQueue::Acquire() {
+    entries.clear();
+}
+
+void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template,
+                                   vk::DescriptorSet set) {
+    if (payload.size() + entries.size() >= payload.max_size()) {
+        LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
+        scheduler.WaitWorker();
+        payload.clear();
+    }
+
+    const auto payload_start = payload.data() + payload.size();
+    for (const auto& entry : entries) {
+        if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) {
+            payload.push_back(*image);
+        } else if (const auto buffer = std::get_if<Buffer>(&entry)) {
+            payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size);
+        } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) {
+            payload.push_back(*texel);
+        } else {
+            UNREACHABLE();
+        }
+    }
+
+    scheduler.Record([dev = device.GetLogical(), payload_start, set,
+                      update_template]([[maybe_unused]] auto cmdbuf, auto& dld) {
+        dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld);
+    });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
new file mode 100644
index 000000000..8c825aa29
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -0,0 +1,86 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include <variant>
+#include <boost/container/static_vector.hpp>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+
+class DescriptorUpdateEntry {
+public:
+    explicit DescriptorUpdateEntry() : image{} {}
+
+    DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {}
+
+    DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size)
+        : buffer{buffer, offset, size} {}
+
+    DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {}
+
+private:
+    union {
+        vk::DescriptorImageInfo image;
+        vk::DescriptorBufferInfo buffer;
+        vk::BufferView texel_buffer;
+    };
+};
+
+class VKUpdateDescriptorQueue final {
+public:
+    explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler);
+    ~VKUpdateDescriptorQueue();
+
+    void TickFrame();
+
+    void Acquire();
+
+    void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set);
+
+    void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) {
+        entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}});
+    }
+
+    void AddImage(vk::ImageView image_view) {
+        entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}});
+    }
+
+    void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) {
+        entries.push_back(Buffer{buffer, offset, size});
+    }
+
+    void AddTexelBuffer(vk::BufferView texel_buffer) {
+        entries.emplace_back(texel_buffer);
+    }
+
+    vk::ImageLayout* GetLastImageLayout() {
+        return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout;
+    }
+
+private:
+    struct Buffer {
+        const vk::Buffer* buffer{};
+        u64 offset{};
+        std::size_t size{};
+    };
+    using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>;
+    // Old gcc versions don't consider this trivially copyable.
+    // static_assert(std::is_trivially_copyable_v<Variant>);
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+
+    boost::container::static_vector<Variant, 0x400> entries;
+    boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index b427ac873..0229733b6 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -65,7 +65,7 @@ struct BlockInfo {
 
 struct CFGRebuildState {
     explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
-        : program_code{program_code}, start{start}, locker{locker} {}
+        : program_code{program_code}, locker{locker}, start{start} {}
 
     const ProgramCode& program_code;
     ConstBufferLocker& locker;
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index c934d0719..7591a715f 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -6,6 +6,7 @@
 #include <vector>
 #include <fmt/format.h>
 
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
@@ -15,6 +16,8 @@
 
 namespace VideoCommon::Shader {
 
+using Tegra::Shader::AtomicOp;
+using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
@@ -22,34 +25,39 @@ using Tegra::Shader::Register;
 
 namespace {
 
-u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
+bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
+    return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
+           uniform_type == Tegra::Shader::UniformType::UnsignedShort;
+}
+
+u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
     switch (uniform_type) {
     case Tegra::Shader::UniformType::UnsignedByte:
-    case Tegra::Shader::UniformType::Single:
-        return 1;
-    case Tegra::Shader::UniformType::Double:
-        return 2;
-    case Tegra::Shader::UniformType::Quad:
-    case Tegra::Shader::UniformType::UnsignedQuad:
-        return 4;
+        return 0b11;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 0b10;
     default:
-        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
-        return 1;
+        UNREACHABLE();
+        return 0;
     }
 }
 
-u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
+u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
     switch (uniform_type) {
+    case Tegra::Shader::UniformType::UnsignedByte:
+        return 8;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 16;
     case Tegra::Shader::UniformType::Single:
-        return 1;
+        return 32;
     case Tegra::Shader::UniformType::Double:
-        return 2;
+        return 64;
     case Tegra::Shader::UniformType::Quad:
     case Tegra::Shader::UniformType::UnsignedQuad:
-        return 4;
+        return 128;
     default:
         UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
-        return 1;
+        return 32;
     }
 }
 
@@ -184,9 +192,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         }();
 
         const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, false);
+            TrackGlobalMemory(bb, instr, true, false);
 
-        const u32 count = GetLdgMemorySize(type);
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
         if (!real_address_base || !base_address) {
             // Tracking failed, load zeroes.
             for (u32 i = 0; i < count; ++i) {
@@ -200,14 +209,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
             Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
 
-            if (type == Tegra::Shader::UniformType::UnsignedByte) {
-                // To handle unaligned loads get the byte used to dereferenced global memory
-                // and extract that byte from the loaded uint32.
-                Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
-                byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
+            // To handle unaligned loads get the bytes used to dereference global memory and extract
+            // those bytes from the loaded u32.
+            if (IsUnaligned(type)) {
+                Node mask = Immediate(GetUnalignedMask(type));
+                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
+                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
 
-                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
-                                 Immediate(8));
+                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
+                                 std::move(offset), Immediate(size));
             }
 
             SetTemporary(bb, i, gmem);
@@ -295,23 +305,53 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             }
         }();
 
+        // For unaligned reads we have to read memory too.
+        const bool is_read = IsUnaligned(type);
         const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, true);
+            TrackGlobalMemory(bb, instr, is_read, true);
         if (!real_address_base || !base_address) {
             // Tracking failed, skip the store.
             break;
         }
 
-        const u32 count = GetStgMemorySize(type);
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
         for (u32 i = 0; i < count; ++i) {
             const Node it_offset = Immediate(i * 4);
             const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
             const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-            const Node value = GetRegister(instr.gpr0.Value() + i);
+            Node value = GetRegister(instr.gpr0.Value() + i);
+
+            if (IsUnaligned(type)) {
+                Node mask = Immediate(GetUnalignedMask(type));
+                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
+                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
+
+                value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
+                                  Immediate(size));
+            }
+
             bb.push_back(Operation(OperationCode::Assign, gmem, value));
         }
         break;
     }
+    case OpCode::Id::ATOMS: {
+        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
+                             static_cast<int>(instr.atoms.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
+                             static_cast<int>(instr.atoms.type.Value()));
+
+        const s32 offset = instr.atoms.GetImmediateOffset();
+        Node address = GetRegister(instr.gpr8);
+        address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
+
+        Node memory = GetSharedMemory(std::move(address));
+        Node data = GetRegister(instr.gpr20);
+
+        Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
     case OpCode::Id::AL2P: {
         // Ignore al2p.direction since we don't care about it.
 
@@ -336,7 +376,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
 
 std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
                                                                      Instruction instr,
-                                                                     bool is_write) {
+                                                                     bool is_read, bool is_write) {
     const auto addr_register{GetRegister(instr.gmem.gpr)};
     const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
 
@@ -351,11 +391,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
     const GlobalMemoryBase descriptor{index, offset};
     const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
     auto& usage = entry->second;
-    if (is_write) {
-        usage.is_written = true;
-    } else {
-        usage.is_read = true;
-    }
+    usage.is_written |= is_write;
+    usage.is_read |= is_read;
 
     const auto real_address =
         Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 4b14cdf58..cd984f763 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -794,14 +794,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
 
 std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
                                                 bool is_tld4) {
-    const auto [coord_offsets, size, wrap_value,
-                diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
-        if (is_tld4) {
-            return {{0, 8, 16}, 6, 32, 64};
-        } else {
-            return {{0, 4, 8}, 4, 8, 16};
-        }
-    }();
+    const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
+    const u32 size = is_tld4 ? 6 : 4;
+    const s32 wrap_value = is_tld4 ? 32 : 8;
+    const s32 diff_value = is_tld4 ? 64 : 16;
     const u32 mask = (1U << size) - 1;
 
     std::vector<Node> aoffi;
@@ -814,7 +810,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
         LOG_WARNING(HW_GPU,
                     "AOFFI constant folding failed, some hardware might have graphical issues");
         for (std::size_t coord = 0; coord < coord_count; ++coord) {
-            const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
+            const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
             const Node condition =
                 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
             const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
@@ -824,7 +820,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
     }
 
     for (std::size_t coord = 0; coord < coord_count; ++coord) {
-        s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
+        s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
         if (value >= wrap_value) {
             value -= diff_value;
         }
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4d2f4d6a8..075c7d07c 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,6 +162,8 @@ enum class OperationCode {
     AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
     AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
 
+    UAtomicAdd, /// (smem, uint) -> uint
+
     Branch,         /// (uint branch_target) -> void
     BranchIndirect, /// (uint branch_target) -> void
     PushFlowStack,  /// (uint branch_target) -> void
@@ -392,8 +394,30 @@ struct MetaImage {
 using Meta =
     std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
 
+class AmendNode {
+public:
+    std::optional<std::size_t> GetAmendIndex() const {
+        if (amend_index == amend_null_index) {
+            return std::nullopt;
+        }
+        return {amend_index};
+    }
+
+    void SetAmendIndex(std::size_t index) {
+        amend_index = index;
+    }
+
+    void ClearAmend() {
+        amend_index = amend_null_index;
+    }
+
+private:
+    static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
+    std::size_t amend_index{amend_null_index};
+};
+
 /// Holds any kind of operation that can be done in the IR
-class OperationNode final {
+class OperationNode final : public AmendNode {
 public:
     explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
 
@@ -433,7 +457,7 @@ private:
 };
 
 /// Encloses inside any kind of node that returns a boolean conditionally-executed code
-class ConditionalNode final {
+class ConditionalNode final : public AmendNode {
 public:
     explicit ConditionalNode(Node condition, std::vector<Node>&& code)
         : condition{std::move(condition)}, code{std::move(code)} {}
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 1d9825c76..31eecb3f4 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
                      Immediate(bits));
 }
 
+std::size_t ShaderIR::DeclareAmend(Node new_amend) {
+    const std::size_t id = amend_code.size();
+    amend_code.push_back(new_amend);
+    return id;
+}
+
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index baed06ccd..ba1db4c11 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -176,6 +176,10 @@ public:
     /// Returns a condition code evaluated from internal flags
     Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
 
+    const Node& GetAmendNode(std::size_t index) const {
+        return amend_code[index];
+    }
+
 private:
     friend class ASTDecoder;
 
@@ -390,7 +394,10 @@ private:
 
     std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
                                                                Tegra::Shader::Instruction instr,
-                                                               bool is_write);
+                                                               bool is_read, bool is_write);
+
+    /// Register new amending code and obtain the reference id.
+    std::size_t DeclareAmend(Node new_amend);
 
     const ProgramCode& program_code;
     const u32 main_offset;
@@ -406,6 +413,7 @@ private:
     std::map<u32, NodeBlock> basic_blocks;
     NodeBlock global_code;
     ASTManager program_manager{true, true};
+    std::vector<Node> amend_code;
 
     std::set<u32> used_registers;
     std::set<Tegra::Shader::Pred> used_predicates;
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 271e67533..81fb9f633 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -95,7 +95,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{
     {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
     {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
     {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
-    {TextureFormat::ZF32_X24S8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z32FS8},
+    {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
 
     {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
     {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB},
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 992b5c022..9256fd6d9 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -209,6 +209,11 @@ public:
         return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
     }
 
+    /// Returns the number of layers in the surface.
+    std::size_t GetNumLayers() const {
+        return is_layered ? depth : 1;
+    }
+
     /// Returns the debug name of the texture for use in graphic debuggers.
     std::string TargetName() const;
 
@@ -287,10 +292,6 @@ private:
     /// Returns the size of a layer
     std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
 
-    std::size_t GetNumLayers() const {
-        return is_layered ? depth : 1;
-    }
-
     /// Returns true if these parameters are from a layered surface.
     bool IsLayered() const;
 };
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 07a720494..7490fb718 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -215,18 +215,11 @@ void GRenderWindow::moveContext() {
 }
 
 void GRenderWindow::SwapBuffers() {
-    // In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`,
-    // since we never call `doneCurrent` in this thread.
-    // However:
-    // - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
-    // since the last time `swapBuffers` was executed;
-    // - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks.
-    context->makeCurrent(child);
-
     context->swapBuffers(child);
+
     if (!first_frame) {
-        emit FirstFrameDisplayed();
         first_frame = true;
+        emit FirstFrameDisplayed();
     }
 }
 
diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_gamelist.cpp
index daedbc33e..e43e84d39 100644
--- a/src/yuzu/configuration/configure_gamelist.cpp
+++ b/src/yuzu/configuration/configure_gamelist.cpp
@@ -21,10 +21,8 @@ constexpr std::array default_icon_sizes{
 };
 
 constexpr std::array row_text_names{
-    QT_TR_NOOP("Filename"),
-    QT_TR_NOOP("Filetype"),
-    QT_TR_NOOP("Title ID"),
-    QT_TR_NOOP("Title Name"),
+    QT_TR_NOOP("Filename"),   QT_TR_NOOP("Filetype"), QT_TR_NOOP("Title ID"),
+    QT_TR_NOOP("Title Name"), QT_TR_NOOP("None"),
 };
 } // Anonymous namespace
 
@@ -46,6 +44,12 @@ ConfigureGameList::ConfigureGameList(QWidget* parent)
             &ConfigureGameList::RequestGameListUpdate);
     connect(ui->row_2_text_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
             &ConfigureGameList::RequestGameListUpdate);
+
+    // Update text ComboBoxes after user interaction.
+    connect(ui->row_1_text_combobox, QOverload<int>::of(&QComboBox::activated),
+            [=]() { ConfigureGameList::UpdateSecondRowComboBox(); });
+    connect(ui->row_2_text_combobox, QOverload<int>::of(&QComboBox::activated),
+            [=]() { ConfigureGameList::UpdateFirstRowComboBox(); });
 }
 
 ConfigureGameList::~ConfigureGameList() = default;
@@ -68,10 +72,6 @@ void ConfigureGameList::SetConfiguration() {
     ui->show_add_ons->setChecked(UISettings::values.show_add_ons);
     ui->icon_size_combobox->setCurrentIndex(
         ui->icon_size_combobox->findData(UISettings::values.icon_size));
-    ui->row_1_text_combobox->setCurrentIndex(
-        ui->row_1_text_combobox->findData(UISettings::values.row_1_text_id));
-    ui->row_2_text_combobox->setCurrentIndex(
-        ui->row_2_text_combobox->findData(UISettings::values.row_2_text_id));
 }
 
 void ConfigureGameList::changeEvent(QEvent* event) {
@@ -104,10 +104,43 @@ void ConfigureGameList::InitializeIconSizeComboBox() {
 }
 
 void ConfigureGameList::InitializeRowComboBoxes() {
-    for (std::size_t i = 0; i < row_text_names.size(); ++i) {
-        const QString row_text_name = QString::fromUtf8(row_text_names[i]);
+    UpdateFirstRowComboBox(true);
+    UpdateSecondRowComboBox(true);
+}
+
+void ConfigureGameList::UpdateFirstRowComboBox(bool init) {
+    const int currentIndex =
+        init ? UISettings::values.row_1_text_id
+             : ui->row_1_text_combobox->findData(ui->row_1_text_combobox->currentData());
 
+    ui->row_1_text_combobox->clear();
+
+    for (std::size_t i = 0; i < row_text_names.size(); i++) {
+        const QString row_text_name = QString::fromUtf8(row_text_names[i]);
         ui->row_1_text_combobox->addItem(row_text_name, QVariant::fromValue(i));
+    }
+
+    ui->row_1_text_combobox->setCurrentIndex(ui->row_1_text_combobox->findData(currentIndex));
+
+    ui->row_1_text_combobox->removeItem(4); // None
+    ui->row_1_text_combobox->removeItem(
+        ui->row_1_text_combobox->findData(ui->row_2_text_combobox->currentData()));
+}
+
+void ConfigureGameList::UpdateSecondRowComboBox(bool init) {
+    const int currentIndex =
+        init ? UISettings::values.row_2_text_id
+             : ui->row_2_text_combobox->findData(ui->row_2_text_combobox->currentData());
+
+    ui->row_2_text_combobox->clear();
+
+    for (std::size_t i = 0; i < row_text_names.size(); ++i) {
+        const QString row_text_name = QString::fromUtf8(row_text_names[i]);
         ui->row_2_text_combobox->addItem(row_text_name, QVariant::fromValue(i));
     }
+
+    ui->row_2_text_combobox->setCurrentIndex(ui->row_2_text_combobox->findData(currentIndex));
+
+    ui->row_2_text_combobox->removeItem(
+        ui->row_2_text_combobox->findData(ui->row_1_text_combobox->currentData()));
 }
diff --git a/src/yuzu/configuration/configure_gamelist.h b/src/yuzu/configuration/configure_gamelist.h
index e11822919..ecd3fa174 100644
--- a/src/yuzu/configuration/configure_gamelist.h
+++ b/src/yuzu/configuration/configure_gamelist.h
@@ -31,5 +31,8 @@ private:
     void InitializeIconSizeComboBox();
     void InitializeRowComboBoxes();
 
+    void UpdateFirstRowComboBox(bool init = false);
+    void UpdateSecondRowComboBox(bool init = false);
+
     std::unique_ptr<Ui::ConfigureGameList> ui;
 };
diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp
index 3ea0b8d67..fa9052136 100644
--- a/src/yuzu/configuration/configure_hotkeys.cpp
+++ b/src/yuzu/configuration/configure_hotkeys.cpp
@@ -48,6 +48,7 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) {
     }
 
     ui->hotkey_list->expandAll();
+    ui->hotkey_list->resizeColumnToContents(0);
 }
 
 void ConfigureHotkeys::changeEvent(QEvent* event) {
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 1c2b37afd..7cde72d1b 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -108,11 +108,14 @@ public:
             }};
 
             const auto& row1 = row_data.at(UISettings::values.row_1_text_id);
-            const auto& row2 = row_data.at(UISettings::values.row_2_text_id);
+            const int row2_id = UISettings::values.row_2_text_id;
 
-            if (row1.isEmpty() || row1 == row2)
-                return row2;
-            if (row2.isEmpty())
+            if (row2_id == 4) // None
+                return row1;
+
+            const auto& row2 = row_data.at(row2_id);
+
+            if (row1 == row2)
                 return row1;
 
             return QString(row1 + QStringLiteral("\n    ") + row2);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index b21fbf826..b5dd3e0d6 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -526,19 +526,30 @@ void GMainWindow::InitializeHotkeys() {
 
     const QString main_window = QStringLiteral("Main Window");
     const QString load_file = QStringLiteral("Load File");
+    const QString load_amiibo = QStringLiteral("Load Amiibo");
     const QString exit_yuzu = QStringLiteral("Exit yuzu");
+    const QString restart_emulation = QStringLiteral("Restart Emulation");
     const QString stop_emulation = QStringLiteral("Stop Emulation");
     const QString toggle_filter_bar = QStringLiteral("Toggle Filter Bar");
     const QString toggle_status_bar = QStringLiteral("Toggle Status Bar");
     const QString fullscreen = QStringLiteral("Fullscreen");
+    const QString capture_screenshot = QStringLiteral("Capture Screenshot");
 
     ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence(main_window, load_file));
     ui.action_Load_File->setShortcutContext(
         hotkey_registry.GetShortcutContext(main_window, load_file));
 
+    ui.action_Load_Amiibo->setShortcut(hotkey_registry.GetKeySequence(main_window, load_amiibo));
+    ui.action_Load_Amiibo->setShortcutContext(
+        hotkey_registry.GetShortcutContext(main_window, load_amiibo));
+
     ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence(main_window, exit_yuzu));
     ui.action_Exit->setShortcutContext(hotkey_registry.GetShortcutContext(main_window, exit_yuzu));
 
+    ui.action_Restart->setShortcut(hotkey_registry.GetKeySequence(main_window, restart_emulation));
+    ui.action_Restart->setShortcutContext(
+        hotkey_registry.GetShortcutContext(main_window, restart_emulation));
+
     ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence(main_window, stop_emulation));
     ui.action_Stop->setShortcutContext(
         hotkey_registry.GetShortcutContext(main_window, stop_emulation));
@@ -553,6 +564,11 @@ void GMainWindow::InitializeHotkeys() {
     ui.action_Show_Status_Bar->setShortcutContext(
         hotkey_registry.GetShortcutContext(main_window, toggle_status_bar));
 
+    ui.action_Capture_Screenshot->setShortcut(
+        hotkey_registry.GetKeySequence(main_window, capture_screenshot));
+    ui.action_Capture_Screenshot->setShortcutContext(
+        hotkey_registry.GetShortcutContext(main_window, capture_screenshot));
+
     connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this),
             &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile);
     connect(
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index 21f422500..a2c9e4547 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -15,7 +15,7 @@
   </property>
   <property name="windowIcon">
    <iconset>
-    <normaloff>src/pcafe/res/icon3_64x64.ico</normaloff>src/pcafe/res/icon3_64x64.ico</iconset>
+    <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset>
   </property>
   <property name="tabShape">
    <enum>QTabWidget::Rounded</enum>
@@ -98,6 +98,7 @@
     <addaction name="action_Display_Dock_Widget_Headers"/>
     <addaction name="action_Show_Filter_Bar"/>
     <addaction name="action_Show_Status_Bar"/>
+    <addaction name="separator"/>
     <addaction name="menu_View_Debugging"/>
    </widget>
    <widget class="QMenu" name="menu_Tools">