46 files changed, 1900 insertions, 315 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6036d6ed3..dac992d44 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,12 @@ add_library(video_core STATIC
     engines/shader_header.h
     gpu.cpp
     gpu.h
+    gpu_asynch.cpp
+    gpu_asynch.h
+    gpu_synch.cpp
+    gpu_synch.h
+    gpu_thread.cpp
+    gpu_thread.h
     macro_interpreter.cpp
     macro_interpreter.h
     memory_manager.cpp
@@ -94,6 +100,8 @@ add_library(video_core STATIC
     surface.h
     textures/astc.cpp
     textures/astc.h
+    textures/convert.cpp
+    textures/convert.h
     textures/decoders.cpp
     textures/decoders.h
     textures/texture.h
@@ -104,6 +112,10 @@ add_library(video_core STATIC
 if (ENABLE_VULKAN)
     target_sources(video_core PRIVATE
         renderer_vulkan/declarations.h
+        renderer_vulkan/maxwell_to_vk.cpp
+        renderer_vulkan/maxwell_to_vk.h
+        renderer_vulkan/vk_buffer_cache.cpp
+        renderer_vulkan/vk_buffer_cache.h
         renderer_vulkan/vk_device.cpp
         renderer_vulkan/vk_device.h
         renderer_vulkan/vk_memory_manager.cpp
@@ -111,7 +123,9 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_resource_manager.cpp
         renderer_vulkan/vk_resource_manager.h
         renderer_vulkan/vk_scheduler.cpp
-        renderer_vulkan/vk_scheduler.h)
+        renderer_vulkan/vk_scheduler.h
+        renderer_vulkan/vk_stream_buffer.cpp
+        renderer_vulkan/vk_stream_buffer.h)
 
     target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
     target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 669541b4b..bff1a37ff 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -39,7 +39,7 @@ bool DmaPusher::Step() {
     }
 
     const CommandList& command_list{dma_pushbuffer.front()};
-    const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
+    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
     GPUVAddr dma_get = command_list_header.addr;
     GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
     bool non_main = command_list_header.is_non_main;
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 540dcc52c..03b7ee5d8 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,11 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "core/core.h"
-#include "core/memory.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
 #include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/textures/decoders.h"
 
 namespace Tegra::Engines {
 
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index c69f74cc5..80523e320 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,7 +5,7 @@
 #pragma once
 
 #include <array>
-#include "common/assert.h"
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 4ca856b6b..b1d950460 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -2,9 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/assert.h"
 #include "common/logging/log.h"
-#include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/memory_manager.h"
 
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index df0a32e0f..6575afd0f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,8 +5,7 @@
 #pragma once
 
 #include <array>
-#include "common/assert.h"
-#include "common/bit_field.h"
+#include <cstddef>
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 4f6126116..aae2a4019 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) {
     // We have to invalidate the destination region to evict any outdated surfaces from the cache.
     // We do this before actually writing the new data because the destination address might contain
     // a dirty surface that will have to be written back to memory.
-    rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
+    Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
 
     Memory::Write32(*dest_address, data);
     system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index f680c2ad9..9181e9d80 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2d2136067..144e7fa82 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
 void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
     auto debug_context = system.GetGPUDebugContext();
 
+    const u32 method = method_call.method;
+
     // It is an error to write to a register other than the current macro's ARG register before it
     // has finished execution.
     if (executing_macro != 0) {
-        ASSERT(method_call.method == executing_macro + 1);
+        ASSERT(method == executing_macro + 1);
     }
 
     // Methods after 0xE00 are special, they're actually triggers for some microcode that was
     // uploaded to the GPU during initialization.
-    if (method_call.method >= MacroRegistersStart) {
+    if (method >= MacroRegistersStart) {
         // We're trying to execute a macro
         if (executing_macro == 0) {
             // A macro call must begin by writing the macro method's register, not its argument.
-            ASSERT_MSG((method_call.method % 2) == 0,
+            ASSERT_MSG((method % 2) == 0,
                        "Can't start macro execution by writing to the ARGS register");
-            executing_macro = method_call.method;
+            executing_macro = method;
         }
 
         macro_params.push_back(method_call.argument);
@@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
         return;
     }
 
-    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+    ASSERT_MSG(method < Regs::NUM_REGS,
                "Invalid Maxwell3D register, increase the size of the Regs structure");
 
     if (debug_context) {
         debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
     }
 
-    if (regs.reg_array[method_call.method] != method_call.argument) {
-        regs.reg_array[method_call.method] = method_call.argument;
+    if (regs.reg_array[method] != method_call.argument) {
+        regs.reg_array[method] = method_call.argument;
         // Color buffers
         constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
         constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
-        if (method_call.method >= first_rt_reg &&
-            method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
-            const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
-            dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
+        if (method >= first_rt_reg &&
+            method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
+            const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
+            dirty_flags.color_buffer.set(rt_index);
         }
 
         // Zeta buffer
         constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
-        if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
-            method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
-            method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
-            (method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
-             method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
+        if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
+            method == MAXWELL3D_REG_INDEX(zeta_width) ||
+            method == MAXWELL3D_REG_INDEX(zeta_height) ||
+            (method >= MAXWELL3D_REG_INDEX(zeta) &&
+             method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
             dirty_flags.zeta_buffer = true;
         }
 
         // Shader
         constexpr u32 shader_registers_count =
             sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
-        if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
-            method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
+        if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
+            method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
             dirty_flags.shaders = true;
         }
 
         // Vertex format
-        if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
-            method_call.method <
-                MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
+        if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
+            method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
             dirty_flags.vertex_attrib_format = true;
         }
 
         // Vertex buffer
-        if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
-            method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
-            dirty_flags.vertex_array |=
-                1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
-        } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
-                   method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
-            dirty_flags.vertex_array |=
-                1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
-        } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
-                   method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
-            dirty_flags.vertex_array |=
-                1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
+        if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
+            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
+            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
+        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
+                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
+            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
+        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
+                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
+            dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
         }
     }
 
-    switch (method_call.method) {
+    switch (method) {
     case MAXWELL3D_REG_INDEX(macros.data): {
         ProcessMacroUpload(method_call.argument);
         break;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 584f51c48..7fbf1026e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
 #pragma once
 
 #include <array>
+#include <bitset>
 #include <unordered_map>
 #include <vector>
+
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
@@ -1094,19 +1096,18 @@ public:
     MemoryManager& memory_manager;
 
     struct DirtyFlags {
-        u8 color_buffer = 0xFF;
-        bool zeta_buffer = true;
-
-        bool shaders = true;
+        std::bitset<8> color_buffer{0xFF};
+        std::bitset<32> vertex_array{0xFFFFFFFF};
 
         bool vertex_attrib_format = true;
-        u32 vertex_array = 0xFFFFFFFF;
+        bool zeta_buffer = true;
+        bool shaders = true;
 
         void OnMemoryWrite() {
-            color_buffer = 0xFF;
             zeta_buffer = true;
             shaders = true;
-            vertex_array = 0xFFFFFFFF;
+            color_buffer.set();
+            vertex_array.set();
         }
     };
 
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 529a14ec7..9dfea5999 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include "common/assert.h"
+#include "common/logging/log.h"
 #include "core/core.h"
 #include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -91,12 +92,12 @@ void MaxwellDMA::HandleCopy() {
     const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
         // copying.
-        rasterizer.FlushRegion(*source_cpu, src_size);
+        Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
 
         // We have to invalidate the destination region to evict any outdated surfaces from the
         // cache. We do this before actually writing the new data because the destination address
         // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(*dest_cpu, dst_size);
+        Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
     };
 
     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index cf75aeb12..34c369320 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 252592edd..c7eb15b6a 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@
 
 #include <bitset>
 #include <optional>
-#include <string>
 #include <tuple>
 #include <vector>
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ac30d1a89..08abf8ac9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,7 +12,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
-#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
 
 namespace Tegra {
 
@@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
     UNREACHABLE();
 }
 
-GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) {
+GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
+    auto& rasterizer{renderer.Rasterizer()};
     memory_manager = std::make_unique<Tegra::MemoryManager>();
     dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
     maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 6313702f2..56a203275 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -16,8 +16,8 @@ class System;
 }
 
 namespace VideoCore {
-class RasterizerInterface;
-}
+class RendererBase;
+} // namespace VideoCore
 
 namespace Tegra {
 
@@ -119,10 +119,11 @@ enum class EngineID {
     MAXWELL_DMA_COPY_A = 0xB0B5,
 };
 
-class GPU final {
+class GPU {
 public:
-    explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
-    ~GPU();
+    explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
+
+    virtual ~GPU();
 
     struct MethodCall {
         u32 method{};
@@ -200,8 +201,42 @@ public:
         };
     } regs{};
 
+    /// Push GPU command entries to be processed
+    virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
+
+    /// Swap buffers (render frame)
+    virtual void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
+
+    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
+    virtual void FlushRegion(VAddr addr, u64 size) = 0;
+
+    /// Notify rasterizer that any caches of the specified region should be invalidated
+    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+
+    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
+    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+
 private:
+    void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessSemaphoreTriggerMethod();
+    void ProcessSemaphoreRelease();
+    void ProcessSemaphoreAcquire();
+
+    /// Calls a GPU puller method.
+    void CallPullerMethod(const MethodCall& method_call);
+
+    /// Calls a GPU engine method.
+    void CallEngineMethod(const MethodCall& method_call);
+
+    /// Determines where the method should be executed.
+    bool ExecuteMethodOnEngine(const MethodCall& method_call);
+
+protected:
     std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+    VideoCore::RendererBase& renderer;
+
+private:
     std::unique_ptr<Tegra::MemoryManager> memory_manager;
 
     /// Mapping of command subchannels to their bound engine ids.
@@ -217,18 +252,6 @@ private:
     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
     /// Inline memory engine
     std::unique_ptr<Engines::KeplerMemory> kepler_memory;
-
-    void ProcessBindMethod(const MethodCall& method_call);
-    void ProcessSemaphoreTriggerMethod();
-    void ProcessSemaphoreRelease();
-    void ProcessSemaphoreAcquire();
-
-    // Calls a GPU puller method.
-    void CallPullerMethod(const MethodCall& method_call);
-    // Calls a GPU engine method.
-    void CallEngineMethod(const MethodCall& method_call);
-    // Determines where the method should be executed.
-    bool ExecuteMethodOnEngine(const MethodCall& method_call);
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
new file mode 100644
index 000000000..ad0a747e3
--- /dev/null
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/gpu_asynch.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon {
+
+GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
+    : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
+
+GPUAsynch::~GPUAsynch() = default;
+
+void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
+    gpu_thread.SubmitList(std::move(entries));
+}
+
+void GPUAsynch::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+    gpu_thread.SwapBuffers(std::move(framebuffer));
+}
+
+void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
+    gpu_thread.FlushRegion(addr, size);
+}
+
+void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
+    gpu_thread.InvalidateRegion(addr, size);
+}
+
+void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+    gpu_thread.FlushAndInvalidateRegion(addr, size);
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
new file mode 100644
index 000000000..e6a807aba
--- /dev/null
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon {
+
+namespace GPUThread {
+class ThreadManager;
+} // namespace GPUThread
+
+/// Implementation of GPU interface that runs the GPU asynchronously
+class GPUAsynch : public Tegra::GPU {
+public:
+    explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
+    ~GPUAsynch() override;
+
+    void PushGPUEntries(Tegra::CommandList&& entries) override;
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+
+private:
+    GPUThread::ThreadManager gpu_thread;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
new file mode 100644
index 000000000..4c00b96c7
--- /dev/null
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/gpu_synch.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon {
+
+GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
+    : Tegra::GPU(system, renderer) {}
+
+GPUSynch::~GPUSynch() = default;
+
+void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
+    dma_pusher->Push(std::move(entries));
+    dma_pusher->DispatchCalls();
+}
+
+void GPUSynch::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+    renderer.SwapBuffers(std::move(framebuffer));
+}
+
+void GPUSynch::FlushRegion(VAddr addr, u64 size) {
+    renderer.Rasterizer().FlushRegion(addr, size);
+}
+
+void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
+    renderer.Rasterizer().InvalidateRegion(addr, size);
+}
+
+void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+    renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
new file mode 100644
index 000000000..7d5a241ff
--- /dev/null
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/gpu.h"
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon {
+
+/// Implementation of GPU interface that runs the GPU synchronously
+class GPUSynch : public Tegra::GPU {
+public:
+    explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
+    ~GPUSynch() override;
+
+    void PushGPUEntries(Tegra::CommandList&& entries) override;
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
new file mode 100644
index 000000000..c5bdd2a17
--- /dev/null
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,152 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/microprofile.h"
+#include "core/frontend/scope_acquire_window_context.h"
+#include "core/settings.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon::GPUThread {
+
+/// Executes a single GPU thread command
+static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
+                           Tegra::DmaPusher& dma_pusher) {
+    if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
+        dma_pusher.Push(std::move(submit_list->entries));
+        dma_pusher.DispatchCalls();
+    } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
+        renderer.SwapBuffers(data->framebuffer);
+    } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
+        renderer.Rasterizer().FlushRegion(data->addr, data->size);
+    } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
+        renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
+    } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
+        renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
+    } else {
+        UNREACHABLE();
+    }
+}
+
+/// Runs the GPU thread
+static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
+                      SynchState& state) {
+
+    MicroProfileOnThreadCreate("GpuThread");
+
+    auto WaitForWakeup = [&]() {
+        std::unique_lock<std::mutex> lock{state.signal_mutex};
+        state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
+    };
+
+    // Wait for first GPU command before acquiring the window context
+    WaitForWakeup();
+
+    // If emulation was stopped during disk shader loading, abort before trying to acquire context
+    if (!state.is_running) {
+        return;
+    }
+
+    Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
+
+    while (state.is_running) {
+        if (!state.is_running) {
+            return;
+        }
+
+        {
+            // Thread has been woken up, so make the previous write queue the next read queue
+            std::lock_guard<std::mutex> lock{state.signal_mutex};
+            std::swap(state.push_queue, state.pop_queue);
+        }
+
+        // Execute all of the GPU commands
+        while (!state.pop_queue->empty()) {
+            ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
+            state.pop_queue->pop();
+        }
+
+        state.UpdateIdleState();
+
+        // Signal that the GPU thread has finished processing commands
+        if (state.is_idle) {
+            state.idle_condition.notify_one();
+        }
+
+        // Wait for CPU thread to send more GPU commands
+        WaitForWakeup();
+    }
+}
+
+ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
+    : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
+                                                         std::ref(dma_pusher), std::ref(state)},
+      thread_id{thread.get_id()} {}
+
+ThreadManager::~ThreadManager() {
+    {
+        // Notify GPU thread that a shutdown is pending
+        std::lock_guard<std::mutex> lock{state.signal_mutex};
+        state.is_running = false;
+    }
+
+    state.signal_condition.notify_one();
+    thread.join();
+}
+
+void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
+    if (entries.empty()) {
+        return;
+    }
+
+    PushCommand(SubmitListCommand(std::move(entries)), false, false);
+}
+
+void ThreadManager::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+    PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
+}
+
+void ThreadManager::FlushRegion(VAddr addr, u64 size) {
+    // Block the CPU when using accurate emulation
+    PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
+}
+
+void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
+    PushCommand(InvalidateRegionCommand(addr, size), true, true);
+}
+
+void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+    InvalidateRegion(addr, size);
+}
+
+void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
+    {
+        std::lock_guard<std::mutex> lock{state.signal_mutex};
+
+        if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
+            // Execute the command synchronously on the current thread
+            ExecuteCommand(&command_data, renderer, dma_pusher);
+            return;
+        }
+
+        // Push the command to the GPU thread
+        state.UpdateIdleState();
+        state.push_queue->emplace(command_data);
+    }
+
+    // Signal the GPU thread that commands are pending
+    state.signal_condition.notify_one();
+
+    if (wait_for_idle) {
+        // Wait for the GPU to be idle (all commands to be executed)
+        std::unique_lock<std::mutex> lock{state.idle_mutex};
+        state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
+    }
+}
+
+} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
new file mode 100644
index 000000000..edb148b14
--- /dev/null
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,133 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <thread>
+#include <variant>
+
+namespace Tegra {
+struct FramebufferConfig;
+class DmaPusher;
+} // namespace Tegra
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon::GPUThread {
+
+/// Command to signal to the GPU thread that a command list is ready for processing
+struct SubmitListCommand final {
+    explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
+
+    Tegra::CommandList entries;
+};
+
+/// Command to signal to the GPU thread that a swap buffers is pending
+struct SwapBuffersCommand final {
+    explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
+        : framebuffer{std::move(framebuffer)} {}
+
+    std::optional<const Tegra::FramebufferConfig> framebuffer;
+};
+
+/// Command to signal to the GPU thread to flush a region
+struct FlushRegionCommand final {
+    explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+
+    const VAddr addr;
+    const u64 size;
+};
+
+/// Command to signal to the GPU thread to invalidate a region
+struct InvalidateRegionCommand final {
+    explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+
+    const VAddr addr;
+    const u64 size;
+};
+
+/// Command to signal to the GPU thread to flush and invalidate a region
+struct FlushAndInvalidateRegionCommand final {
+    explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
+        : addr{addr}, size{size} {}
+
+    const VAddr addr;
+    const u64 size;
+};
+
+using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+                                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+
+/// Struct used to synchronize the GPU thread
+struct SynchState final {
+    std::atomic<bool> is_running{true};
+    std::atomic<bool> is_idle{true};
+    std::condition_variable signal_condition;
+    std::mutex signal_mutex;
+    std::condition_variable idle_condition;
+    std::mutex idle_mutex;
+
+    // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
+    // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
+    // empty. This allows for efficient thread-safe access, as it does not require any copies.
+
+    using CommandQueue = std::queue<CommandData>;
+    std::array<CommandQueue, 2> command_queues;
+    CommandQueue* push_queue{&command_queues[0]};
+    CommandQueue* pop_queue{&command_queues[1]};
+
+    void UpdateIdleState() {
+        std::lock_guard<std::mutex> lock{idle_mutex};
+        is_idle = command_queues[0].empty() && command_queues[1].empty();
+    }
+};
+
+/// Class used to manage the GPU thread
+class ThreadManager final {
+public:
+    explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+    ~ThreadManager();
+
+    /// Push GPU command entries to be processed
+    void SubmitList(Tegra::CommandList&& entries);
+
+    /// Swap buffers (render frame)
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
+
+    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
+    void FlushRegion(VAddr addr, u64 size);
+
+    /// Notify rasterizer that any caches of the specified region should be invalidated
+    void InvalidateRegion(VAddr addr, u64 size);
+
+    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
+    void FlushAndInvalidateRegion(VAddr addr, u64 size);
+
+private:
+    /// Pushes a command to be executed by the GPU thread
+    void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
+
+    /// Returns true if this is called by the GPU thread
+    bool IsGpuThread() const {
+        return std::this_thread::get_id() == thread_id;
+    }
+
+private:
+    SynchState state;
+    VideoCore::RendererBase& renderer;
+    Tegra::DmaPusher& dma_pusher;
+    std::thread thread;
+    std::thread::id thread_id;
+};
+
+} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..a7bcf26fb 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -129,6 +129,15 @@ protected:
         return ++modified_ticks;
     }
 
+    /// Flushes the specified object, updating appropriate cache state as needed
+    void FlushObject(const T& object) {
+        if (!object->IsDirty()) {
+            return;
+        }
+        object->Flush();
+        object->MarkAsModified(false, *this);
+    }
+
 private:
     /// Returns a list of cached objects from the specified memory region, ordered by access time
     std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
@@ -154,15 +163,6 @@ private:
         return objects;
     }
 
-    /// Flushes the specified object, updating appropriate cache state as needed
-    void FlushObject(const T& object) {
-        if (!object->IsDirty()) {
-            return;
-        }
-        object->Flush();
-        object->MarkAsModified(false, *this);
-    }
-
     using ObjectSet = std::set<T>;
     using ObjectCache = std::unordered_map<VAddr, T>;
     using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 94223f45f..919d1f2d4 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/logging/log.h"
 #include "core/frontend/emu_window.h"
 #include "core/settings.h"
 #include "video_core/renderer_base.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 406d720a0..301562ff6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,8 +102,8 @@ struct FramebufferCacheKey {
 
 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
                                    ScreenInfo& info)
-    : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
-      buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
+    : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
+      screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
     // Create sampler objects
     for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
         texture_samplers[i].Create();
@@ -118,7 +118,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst
 
     glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
 
-    LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
+    LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
     CheckExtensions();
 }
 
@@ -177,7 +177,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
                 continue;
 
             const auto& buffer = regs.vertex_array[attrib.buffer];
-            LOG_TRACE(HW_GPU,
+            LOG_TRACE(Render_OpenGL,
                       "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
                       index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
                       attrib.offset.Value(), attrib.IsNormalized());
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
     }
 
     // Rebinding the VAO invalidates the vertex buffer bindings.
-    gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+    gpu.dirty_flags.vertex_array.set();
 
     state.draw.vertex_array = vao_entry.handle;
     return vao_entry.handle;
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
     auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
     const auto& regs = gpu.regs;
 
-    if (!gpu.dirty_flags.vertex_array)
+    if (gpu.dirty_flags.vertex_array.none())
         return;
 
     MICROPROFILE_SCOPE(OpenGL_VB);
 
     // Upload all guest vertex arrays sequentially to our buffer
     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        if (~gpu.dirty_flags.vertex_array & (1u << index))
+        if (!gpu.dirty_flags.vertex_array[index])
             continue;
 
         const auto& vertex_array = regs.vertex_array[index];
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
         }
     }
 
-    gpu.dirty_flags.vertex_array = 0;
+    gpu.dirty_flags.vertex_array.reset();
 }
 
 DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -343,9 +343,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
             shader_program_manager->UseProgrammableFragmentShader(program_handle);
             break;
         default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
-                         shader_config.enable.Value(), shader_config.offset);
-            UNREACHABLE();
+            UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
+                              shader_config.enable.Value(), shader_config.offset);
         }
 
         const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
@@ -488,13 +487,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
     OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
     std::optional<std::size_t> single_color_target) {
     MICROPROFILE_SCOPE(OpenGL_Framebuffer);
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
     const auto& regs = gpu.regs;
 
     const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
                                                  single_color_target};
-    if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
-        !gpu.dirty_flags.zeta_buffer) {
+    if (fb_config_state == current_framebuffer_config_state &&
+        gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
         // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
         // single color targets). This is done because the guest registers may not change but the
         // host framebuffer may contain different attachments
@@ -721,10 +720,10 @@ void RasterizerOpenGL::DrawArrays() {
     // Add space for at least 18 constant buffers
     buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
 
-    bool invalidate = buffer_cache.Map(buffer_size);
+    const bool invalidate = buffer_cache.Map(buffer_size);
     if (invalidate) {
         // As all cached buffers are invalidated, we need to recheck their state.
-        gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+        gpu.dirty_flags.vertex_array.set();
     }
 
     const GLuint vao = SetupVertexFormat();
@@ -738,30 +737,18 @@ void RasterizerOpenGL::DrawArrays() {
     shader_program_manager->ApplyTo(state);
     state.Apply();
 
-    // Execute draw call
+    res_cache.SignalPreDrawCall();
     params.DispatchDraw();
-
-    // Disable scissor test
-    state.viewports[0].scissor.enabled = false;
+    res_cache.SignalPostDrawCall();
 
     accelerate_draw = AccelDraw::Disabled;
-
-    // Unbind textures for potential future use as framebuffer attachments
-    for (auto& texture_unit : state.texture_units) {
-        texture_unit.Unbind();
-    }
-    state.Apply();
 }
 
 void RasterizerOpenGL::FlushAll() {}
 
 void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-
-    if (Settings::values.use_accurate_gpu_emulation) {
-        // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
-        res_cache.FlushRegion(addr, size);
-    }
+    res_cache.FlushRegion(addr, size);
 }
 
 void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -805,7 +792,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
         VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
     ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
     ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
-    ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
+
+    if (params.pixel_format != pixel_format) {
+        LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
+    }
 
     screen_info.display_texture = surface->Texture().handle;
 
@@ -951,8 +941,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
             size = buffer.size;
 
             if (size > MaxConstbufferSize) {
-                LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
-                             MaxConstbufferSize);
+                LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
+                            MaxConstbufferSize);
                 size = MaxConstbufferSize;
             }
         } else {
@@ -1246,11 +1236,7 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
 
 void RasterizerOpenGL::SyncTransformFeedback() {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-
-    if (regs.tfb_enabled != 0) {
-        LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
-        UNREACHABLE();
-    }
+    UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
 }
 
 void RasterizerOpenGL::SyncPointState() {
@@ -1270,12 +1256,8 @@ void RasterizerOpenGL::SyncPolygonOffset() {
 
 void RasterizerOpenGL::CheckAlphaTests() {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-
-    if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) {
-        LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
-                                    "this behavior is undefined.");
-        UNREACHABLE();
-    }
+    UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
+                         "Alpha Testing is enabled with more than one rendertarget");
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 837523b82..e9eb6e921 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <optional>
 #include <glad/glad.h>
 
 #include "common/alignment.h"
@@ -20,7 +21,7 @@
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/utils.h"
 #include "video_core/surface.h"
-#include "video_core/textures/astc.h"
+#include "video_core/textures/convert.h"
 #include "video_core/textures/decoders.h"
 
 namespace OpenGL {
@@ -570,6 +571,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
     // alternatives. This signals a bug on those functions.
     const auto width = static_cast<GLsizei>(params.MipWidth(0));
     const auto height = static_cast<GLsizei>(params.MipHeight(0));
+    memory_size = params.MemorySize();
+    reinterpreted = false;
 
     const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
     gl_internal_format = format_tuple.internal_format;
@@ -615,103 +618,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
     }
 }
 
-static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
-    union S8Z24 {
-        BitField<0, 24, u32> z24;
-        BitField<24, 8, u32> s8;
-    };
-    static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
-
-    union Z24S8 {
-        BitField<0, 8, u32> s8;
-        BitField<8, 24, u32> z24;
-    };
-    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
-
-    S8Z24 s8z24_pixel{};
-    Z24S8 z24s8_pixel{};
-    constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
-    for (std::size_t y = 0; y < height; ++y) {
-        for (std::size_t x = 0; x < width; ++x) {
-            const std::size_t offset{bpp * (y * width + x)};
-            if (reverse) {
-                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
-                s8z24_pixel.s8.Assign(z24s8_pixel.s8);
-                s8z24_pixel.z24.Assign(z24s8_pixel.z24);
-                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
-            } else {
-                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
-                z24s8_pixel.s8.Assign(s8z24_pixel.s8);
-                z24s8_pixel.z24.Assign(s8z24_pixel.z24);
-                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
-            }
-        }
-    }
-}
-
-/**
- * Helper function to perform software conversion (as needed) when loading a buffer from Switch
- * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
- * typical desktop GPUs.
- */
-static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
-                                               u32 width, u32 height, u32 depth) {
-    switch (pixel_format) {
-    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::ASTC_2D_8X8:
-    case PixelFormat::ASTC_2D_8X5:
-    case PixelFormat::ASTC_2D_5X4:
-    case PixelFormat::ASTC_2D_5X5:
-    case PixelFormat::ASTC_2D_4X4_SRGB:
-    case PixelFormat::ASTC_2D_8X8_SRGB:
-    case PixelFormat::ASTC_2D_8X5_SRGB:
-    case PixelFormat::ASTC_2D_5X4_SRGB:
-    case PixelFormat::ASTC_2D_5X5_SRGB:
-    case PixelFormat::ASTC_2D_10X8:
-    case PixelFormat::ASTC_2D_10X8_SRGB: {
-        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
-        u32 block_width{};
-        u32 block_height{};
-        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
-        data =
-            Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
-        break;
-    }
-    case PixelFormat::S8Z24:
-        // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
-        ConvertS8Z24ToZ24S8(data, width, height, false);
-        break;
-    }
-}
-
-/**
- * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
- * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
- * with typical desktop GPUs.
- */
-static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
-                                                u32 width, u32 height) {
-    switch (pixel_format) {
-    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::ASTC_2D_8X8:
-    case PixelFormat::ASTC_2D_4X4_SRGB:
-    case PixelFormat::ASTC_2D_8X8_SRGB:
-    case PixelFormat::ASTC_2D_5X5:
-    case PixelFormat::ASTC_2D_5X5_SRGB:
-    case PixelFormat::ASTC_2D_10X8:
-    case PixelFormat::ASTC_2D_10X8_SRGB: {
-        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
-                     static_cast<u32>(pixel_format));
-        UNREACHABLE();
-        break;
-    }
-    case PixelFormat::S8Z24:
-        // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
-        ConvertS8Z24ToZ24S8(data, width, height, true);
-        break;
-    }
-}
-
 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
 void CachedSurface::LoadGLBuffer() {
     MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
@@ -740,8 +646,16 @@ void CachedSurface::LoadGLBuffer() {
         }
     }
     for (u32 i = 0; i < params.max_mip_level; i++) {
-        ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
-                                           params.MipHeight(i), params.MipDepth(i));
+        const u32 width = params.MipWidth(i);
+        const u32 height = params.MipHeight(i);
+        const u32 depth = params.MipDepth(i);
+        if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
+            // Reserve size for RGBA8 conversion
+            constexpr std::size_t rgba_bpp = 4;
+            gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
+        }
+        Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
+                                               height, depth, true, true);
     }
 }
 
@@ -764,8 +678,8 @@ void CachedSurface::FlushGLBuffer() {
     glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
                       static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
     glPixelStorei(GL_PACK_ROW_LENGTH, 0);
-    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
-                                        params.height);
+    Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
+                                           params.height, params.depth, true, true);
     const u8* const texture_src_data = Memory::GetPointer(params.addr);
     ASSERT(texture_src_data);
     if (params.is_tiled) {
@@ -985,30 +899,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
     auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
     const auto& regs{gpu.regs};
 
-    if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
-        return last_color_buffers[index];
+    if (!gpu.dirty_flags.color_buffer[index]) {
+        return current_color_buffers[index];
     }
-    gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
+    gpu.dirty_flags.color_buffer.reset(index);
 
     ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
 
     if (index >= regs.rt_control.count) {
-        return last_color_buffers[index] = {};
+        return current_color_buffers[index] = {};
     }
 
     if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
-        return last_color_buffers[index] = {};
+        return current_color_buffers[index] = {};
     }
 
     const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
 
-    return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
+    return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
 }
 
 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
     surface->LoadGLBuffer();
     surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
     surface->MarkAsModified(false, *this);
+    surface->MarkForReload(false);
 }
 
 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -1020,18 +935,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
     Surface surface{TryGet(params.addr)};
     if (surface) {
         if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
-            // Use the cached surface as-is
+            // Use the cached surface as-is unless it's not synced with memory
+            if (surface->MustReload())
+                LoadSurface(surface);
             return surface;
         } else if (preserve_contents) {
             // If surface parameters changed and we care about keeping the previous data, recreate
             // the surface from the old one
             Surface new_surface{RecreateSurface(surface, params)};
-            Unregister(surface);
+            UnregisterSurface(surface);
             Register(new_surface);
+            if (new_surface->IsUploaded()) {
+                RegisterReinterpretSurface(new_surface);
+            }
             return new_surface;
         } else {
             // Delete the old surface before creating a new one to prevent collisions.
-            Unregister(surface);
+            UnregisterSurface(surface);
         }
     }
 
@@ -1313,4 +1233,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
     return {};
 }
 
+static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
+                                            u32 height) {
+    for (u32 i = 0; i < params.max_mip_level; i++) {
+        if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
+            return {i};
+        }
+    }
+    return {};
+}
+
+static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
+    const std::size_t size = params.LayerMemorySize();
+    VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
+    for (u32 i = 0; i < params.depth; i++) {
+        if (start == addr) {
+            return {i};
+        }
+        start += size;
+    }
+    return {};
+}
+
+static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
+                                       const Surface blitted_surface) {
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    const std::size_t src_memory_size = src_params.size_in_bytes;
+    const std::optional<u32> level =
+        TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
+    if (level.has_value()) {
+        if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
+            src_params.height == dst_params.MipHeight(*level) &&
+            src_params.block_height >= dst_params.MipBlockHeight(*level)) {
+            const std::optional<u32> slot =
+                TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
+            if (slot.has_value()) {
+                glCopyImageSubData(render_surface->Texture().handle,
+                                   SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
+                                   blitted_surface->Texture().handle,
+                                   SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
+                                   dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
+                blitted_surface->MarkAsModified(true, cache);
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
+    const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
+    const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
+    if (bound2 > bound1)
+        return true;
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    return (dst_params.component_type != src_params.component_type);
+}
+
+static bool IsReinterpretInvalidSecond(const Surface render_surface,
+                                       const Surface blitted_surface) {
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    return (dst_params.height > src_params.height && dst_params.width > src_params.width);
+}
+
+bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
+                                                      Surface intersect) {
+    if (IsReinterpretInvalid(triggering_surface, intersect)) {
+        UnregisterSurface(intersect);
+        return false;
+    }
+    if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
+        if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
+            UnregisterSurface(intersect);
+            return false;
+        }
+        FlushObject(intersect);
+        FlushObject(triggering_surface);
+        intersect->MarkForReload(true);
+    }
+    return true;
+}
+
+void RasterizerCacheOpenGL::SignalPreDrawCall() {
+    if (texception && GLAD_GL_ARB_texture_barrier) {
+        glTextureBarrier();
+    }
+    texception = false;
+}
+
+void RasterizerCacheOpenGL::SignalPostDrawCall() {
+    for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
+        if (current_color_buffers[i] != nullptr) {
+            Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
+            if (intersect != nullptr) {
+                PartialReinterpretSurface(current_color_buffers[i], intersect);
+                texception = true;
+            }
+        }
+    }
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 32fc6538a..9cf6f50be 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -34,6 +34,7 @@ using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
 using SurfaceType = VideoCore::Surface::SurfaceType;
 using PixelFormat = VideoCore::Surface::PixelFormat;
 using ComponentType = VideoCore::Surface::ComponentType;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 struct SurfaceParams {
     enum class SurfaceClass {
@@ -140,10 +141,18 @@ struct SurfaceParams {
         return offset;
     }
 
+    std::size_t GetMipmapSingleSize(u32 mip_level) const {
+        return InnerMipmapMemorySize(mip_level, false, is_layered);
+    }
+
     u32 MipWidth(u32 mip_level) const {
         return std::max(1U, width >> mip_level);
     }
 
+    u32 MipWidthGobAligned(u32 mip_level) const {
+        return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
+    }
+
     u32 MipHeight(u32 mip_level) const {
         return std::max(1U, height >> mip_level);
     }
@@ -346,6 +355,10 @@ public:
         return cached_size_in_bytes;
     }
 
+    std::size_t GetMemorySize() const {
+        return memory_size;
+    }
+
     void Flush() override {
         FlushGLBuffer();
     }
@@ -383,6 +396,26 @@ public:
                        Tegra::Texture::SwizzleSource swizzle_z,
                        Tegra::Texture::SwizzleSource swizzle_w);
 
+    void MarkReinterpreted() {
+        reinterpreted = true;
+    }
+
+    bool IsReinterpreted() const {
+        return reinterpreted;
+    }
+
+    void MarkForReload(bool reload) {
+        must_reload = reload;
+    }
+
+    bool MustReload() const {
+        return must_reload;
+    }
+
+    bool IsUploaded() const {
+        return params.identity == SurfaceParams::SurfaceClass::Uploaded;
+    }
+
 private:
     void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
 
@@ -396,6 +429,9 @@ private:
     GLenum gl_internal_format{};
     std::size_t cached_size_in_bytes{};
     std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
+    std::size_t memory_size;
+    bool reinterpreted = false;
+    bool must_reload = false;
 };
 
 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -421,6 +457,9 @@ public:
                           const Common::Rectangle<u32>& src_rect,
                           const Common::Rectangle<u32>& dst_rect);
 
+    void SignalPreDrawCall();
+    void SignalPostDrawCall();
+
 private:
     void LoadSurface(const Surface& surface);
     Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
@@ -437,6 +476,10 @@ private:
     /// Tries to get a reserved surface for the specified parameters
     Surface TryGetReservedSurface(const SurfaceParams& params);
 
+    // Partialy reinterpret a surface based on a triggering_surface that collides with it.
+    // returns true if the reinterpret was successful, false in case it was not.
+    bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
+
     /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
     void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
     void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -453,12 +496,50 @@ private:
     OGLFramebuffer read_framebuffer;
     OGLFramebuffer draw_framebuffer;
 
+    bool texception = false;
+
     /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
     /// using the new format.
     OGLBuffer copy_pbo;
 
-    std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
+    std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
+    std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
     Surface last_depth_buffer;
+
+    using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
+    using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
+
+    static auto GetReinterpretInterval(const Surface& object) {
+        return SurfaceInterval::right_open(object->GetAddr() + 1,
+                                           object->GetAddr() + object->GetMemorySize() - 1);
+    }
+
+    // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
+    SurfaceIntervalCache reinterpreted_surfaces;
+
+    void RegisterReinterpretSurface(Surface reinterpret_surface) {
+        auto interval = GetReinterpretInterval(reinterpret_surface);
+        reinterpreted_surfaces.insert({interval, reinterpret_surface});
+        reinterpret_surface->MarkReinterpreted();
+    }
+
+    Surface CollideOnReinterpretedSurface(VAddr addr) const {
+        const SurfaceInterval interval{addr};
+        for (auto& pair :
+             boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
+            return pair.second;
+        }
+        return nullptr;
+    }
+
+    /// Unregisters an object from the cache
+    void UnregisterSurface(const Surface& object) {
+        if (object->IsReinterpreted()) {
+            auto interval = GetReinterpretInterval(object);
+            reinterpreted_surfaces.erase(interval);
+        }
+        Unregister(object);
+    }
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 219f08053..9419326a3 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -461,7 +461,7 @@ void OpenGLState::ApplyTextures() const {
 
     if (has_delta) {
         glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
-                       textures.data());
+                       textures.data() + first);
     }
 }
 
@@ -482,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
     }
     if (has_delta) {
         glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
-                       samplers.data());
+                       samplers.data() + first);
     }
 }
 
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e60b2eb44..8b510b6ae 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() {
     LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
 }
 
+void RendererOpenGL::AddTelemetryFields() {
+    const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
+    const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
+    const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
+
+    LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
+    LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
+    LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
+
+    auto& telemetry_session = system.TelemetrySession();
+    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
+    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
+    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+}
+
 void RendererOpenGL::CreateRasterizer() {
     if (rasterizer) {
         return;
@@ -466,17 +481,7 @@ bool RendererOpenGL::Init() {
         glDebugMessageCallback(DebugHandler, nullptr);
     }
 
-    const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
-    const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
-    const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
-
-    LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
-    LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
-    LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
-
-    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
-    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
-    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+    AddTelemetryFields();
 
     if (!GLAD_GL_VERSION_4_3) {
         return false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index c168fa89e..6cbf9d2cb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -60,6 +60,7 @@ public:
 
 private:
     void InitOpenGLObjects();
+    void AddTelemetryFields();
     void CreateRasterizer();
 
     void ConfigureFramebufferTexture(TextureInfo& texture,
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
new file mode 100644
index 000000000..34bf26ff2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/surface.h"
+
+namespace Vulkan::MaxwellToVK {
+
+namespace Sampler {
+
+vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
+    switch (filter) {
+    case Tegra::Texture::TextureFilter::Linear:
+        return vk::Filter::eLinear;
+    case Tegra::Texture::TextureFilter::Nearest:
+        return vk::Filter::eNearest;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
+    return {};
+}
+
+vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
+    switch (mipmap_filter) {
+    case Tegra::Texture::TextureMipmapFilter::None:
+        // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
+        // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
+        // use an image view with a single mipmap level to emulate this.
+        return vk::SamplerMipmapMode::eLinear;
+    case Tegra::Texture::TextureMipmapFilter::Linear:
+        return vk::SamplerMipmapMode::eLinear;
+    case Tegra::Texture::TextureMipmapFilter::Nearest:
+        return vk::SamplerMipmapMode::eNearest;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
+    return {};
+}
+
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
+    switch (wrap_mode) {
+    case Tegra::Texture::WrapMode::Wrap:
+        return vk::SamplerAddressMode::eRepeat;
+    case Tegra::Texture::WrapMode::Mirror:
+        return vk::SamplerAddressMode::eMirroredRepeat;
+    case Tegra::Texture::WrapMode::ClampToEdge:
+        return vk::SamplerAddressMode::eClampToEdge;
+    case Tegra::Texture::WrapMode::Border:
+        return vk::SamplerAddressMode::eClampToBorder;
+    case Tegra::Texture::WrapMode::ClampOGL:
+        // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
+        // eClampToBorder to get the border color of the texture, and then sample the edge to
+        // manually mix them. However the shader part of this is not yet implemented.
+        return vk::SamplerAddressMode::eClampToBorder;
+    case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
+        return vk::SamplerAddressMode::eMirrorClampToEdge;
+    case Tegra::Texture::WrapMode::MirrorOnceBorder:
+        UNIMPLEMENTED();
+        return vk::SamplerAddressMode::eMirrorClampToEdge;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+    return {};
+}
+
+vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
+    switch (depth_compare_func) {
+    case Tegra::Texture::DepthCompareFunc::Never:
+        return vk::CompareOp::eNever;
+    case Tegra::Texture::DepthCompareFunc::Less:
+        return vk::CompareOp::eLess;
+    case Tegra::Texture::DepthCompareFunc::LessEqual:
+        return vk::CompareOp::eLessOrEqual;
+    case Tegra::Texture::DepthCompareFunc::Equal:
+        return vk::CompareOp::eEqual;
+    case Tegra::Texture::DepthCompareFunc::NotEqual:
+        return vk::CompareOp::eNotEqual;
+    case Tegra::Texture::DepthCompareFunc::Greater:
+        return vk::CompareOp::eGreater;
+    case Tegra::Texture::DepthCompareFunc::GreaterEqual:
+        return vk::CompareOp::eGreaterOrEqual;
+    case Tegra::Texture::DepthCompareFunc::Always:
+        return vk::CompareOp::eAlways;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
+                      static_cast<u32>(depth_compare_func));
+    return {};
+}
+
+} // namespace Sampler
+
+struct FormatTuple {
+    vk::Format format;            ///< Vulkan format
+    ComponentType component_type; ///< Abstracted component type
+    bool attachable;              ///< True when this format can be used as an attachment
+};
+
+static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
+    {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true},    // ABGR8U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ABGR8S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ABGR8UI
+    {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false},     // B5G6R5U
+    {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // A1B5G5R5U
+    {vk::Format::eR8Unorm, ComponentType::UNorm, true},                // R8U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R8UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R11FG11FB10F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA32UI
+    {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false},     // DXT1
+    {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false},         // DXT23
+    {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false},         // DXT45
+    {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false},         // DXN1
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // DXN2UNORM
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // DXN2SNORM
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC7U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC6H_UF16
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC6H_SF16
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_4X4
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BGRA8
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA32F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG32F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R32F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16I
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16I
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGB32F
+    {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true},     // RGBA8_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG8U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG8S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG32UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R32UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_8X8
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_8X5
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_5X4
+
+    // Compressed sRGB formats
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
+
+    // Depth formats
+    {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
+    {vk::Format::eD16Unorm, ComponentType::UNorm, true},  // Z16
+
+    // DepthStencil formats
+    {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
+    {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
+    {vk::Format::eUndefined, ComponentType::Invalid, false},   // Z32FS8
+}};
+
+static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
+    return pixel_format >= PixelFormat::MaxColorFormat &&
+           pixel_format < PixelFormat::MaxDepthStencilFormat;
+}
+
+std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
+                                          PixelFormat pixel_format, ComponentType component_type) {
+    ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
+
+    const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
+    UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
+                         "Unimplemented texture format with pixel format={} and component type={}",
+                         static_cast<u32>(pixel_format), static_cast<u32>(component_type));
+    ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
+
+    auto usage = vk::FormatFeatureFlagBits::eSampledImage |
+                 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
+    if (tuple.attachable) {
+        usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
+                                            : vk::FormatFeatureFlagBits::eColorAttachment;
+    }
+    return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
+}
+
+vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
+    switch (stage) {
+    case Maxwell::ShaderStage::Vertex:
+        return vk::ShaderStageFlagBits::eVertex;
+    case Maxwell::ShaderStage::TesselationControl:
+        return vk::ShaderStageFlagBits::eTessellationControl;
+    case Maxwell::ShaderStage::TesselationEval:
+        return vk::ShaderStageFlagBits::eTessellationEvaluation;
+    case Maxwell::ShaderStage::Geometry:
+        return vk::ShaderStageFlagBits::eGeometry;
+    case Maxwell::ShaderStage::Fragment:
+        return vk::ShaderStageFlagBits::eFragment;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
+    return {};
+}
+
+vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
+    switch (topology) {
+    case Maxwell::PrimitiveTopology::Points:
+        return vk::PrimitiveTopology::ePointList;
+    case Maxwell::PrimitiveTopology::Lines:
+        return vk::PrimitiveTopology::eLineList;
+    case Maxwell::PrimitiveTopology::LineStrip:
+        return vk::PrimitiveTopology::eLineStrip;
+    case Maxwell::PrimitiveTopology::Triangles:
+        return vk::PrimitiveTopology::eTriangleList;
+    case Maxwell::PrimitiveTopology::TriangleStrip:
+        return vk::PrimitiveTopology::eTriangleStrip;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+    return {};
+}
+
+vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
+    switch (type) {
+    case Maxwell::VertexAttribute::Type::SignedNorm:
+        break;
+    case Maxwell::VertexAttribute::Type::UnsignedNorm:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Unorm;
+        default:
+            break;
+        }
+        break;
+    case Maxwell::VertexAttribute::Type::SignedInt:
+        break;
+    case Maxwell::VertexAttribute::Type::UnsignedInt:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_32:
+            return vk::Format::eR32Uint;
+        default:
+            break;
+        }
+    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+    case Maxwell::VertexAttribute::Type::SignedScaled:
+        break;
+    case Maxwell::VertexAttribute::Type::Float:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+            return vk::Format::eR32G32B32A32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32_32:
+            return vk::Format::eR32G32B32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32:
+            return vk::Format::eR32G32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32:
+            return vk::Format::eR32Sfloat;
+        default:
+            break;
+        }
+        break;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
+                      static_cast<u32>(size));
+    return {};
+}
+
+vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
+    switch (comparison) {
+    case Maxwell::ComparisonOp::Never:
+    case Maxwell::ComparisonOp::NeverOld:
+        return vk::CompareOp::eNever;
+    case Maxwell::ComparisonOp::Less:
+    case Maxwell::ComparisonOp::LessOld:
+        return vk::CompareOp::eLess;
+    case Maxwell::ComparisonOp::Equal:
+    case Maxwell::ComparisonOp::EqualOld:
+        return vk::CompareOp::eEqual;
+    case Maxwell::ComparisonOp::LessEqual:
+    case Maxwell::ComparisonOp::LessEqualOld:
+        return vk::CompareOp::eLessOrEqual;
+    case Maxwell::ComparisonOp::Greater:
+    case Maxwell::ComparisonOp::GreaterOld:
+        return vk::CompareOp::eGreater;
+    case Maxwell::ComparisonOp::NotEqual:
+    case Maxwell::ComparisonOp::NotEqualOld:
+        return vk::CompareOp::eNotEqual;
+    case Maxwell::ComparisonOp::GreaterEqual:
+    case Maxwell::ComparisonOp::GreaterEqualOld:
+        return vk::CompareOp::eGreaterOrEqual;
+    case Maxwell::ComparisonOp::Always:
+    case Maxwell::ComparisonOp::AlwaysOld:
+        return vk::CompareOp::eAlways;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
+    return {};
+}
+
+vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
+    switch (index_format) {
+    case Maxwell::IndexFormat::UnsignedByte:
+        UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
+        return vk::IndexType::eUint16;
+    case Maxwell::IndexFormat::UnsignedShort:
+        return vk::IndexType::eUint16;
+    case Maxwell::IndexFormat::UnsignedInt:
+        return vk::IndexType::eUint32;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
+    return {};
+}
+
+vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
+    switch (stencil_op) {
+    case Maxwell::StencilOp::Keep:
+    case Maxwell::StencilOp::KeepOGL:
+        return vk::StencilOp::eKeep;
+    case Maxwell::StencilOp::Zero:
+    case Maxwell::StencilOp::ZeroOGL:
+        return vk::StencilOp::eZero;
+    case Maxwell::StencilOp::Replace:
+    case Maxwell::StencilOp::ReplaceOGL:
+        return vk::StencilOp::eReplace;
+    case Maxwell::StencilOp::Incr:
+    case Maxwell::StencilOp::IncrOGL:
+        return vk::StencilOp::eIncrementAndClamp;
+    case Maxwell::StencilOp::Decr:
+    case Maxwell::StencilOp::DecrOGL:
+        return vk::StencilOp::eDecrementAndClamp;
+    case Maxwell::StencilOp::Invert:
+    case Maxwell::StencilOp::InvertOGL:
+        return vk::StencilOp::eInvert;
+    case Maxwell::StencilOp::IncrWrap:
+    case Maxwell::StencilOp::IncrWrapOGL:
+        return vk::StencilOp::eIncrementAndWrap;
+    case Maxwell::StencilOp::DecrWrap:
+    case Maxwell::StencilOp::DecrWrapOGL:
+        return vk::StencilOp::eDecrementAndWrap;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
+    return {};
+}
+
+vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
+    switch (equation) {
+    case Maxwell::Blend::Equation::Add:
+    case Maxwell::Blend::Equation::AddGL:
+        return vk::BlendOp::eAdd;
+    case Maxwell::Blend::Equation::Subtract:
+    case Maxwell::Blend::Equation::SubtractGL:
+        return vk::BlendOp::eSubtract;
+    case Maxwell::Blend::Equation::ReverseSubtract:
+    case Maxwell::Blend::Equation::ReverseSubtractGL:
+        return vk::BlendOp::eReverseSubtract;
+    case Maxwell::Blend::Equation::Min:
+    case Maxwell::Blend::Equation::MinGL:
+        return vk::BlendOp::eMin;
+    case Maxwell::Blend::Equation::Max:
+    case Maxwell::Blend::Equation::MaxGL:
+        return vk::BlendOp::eMax;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
+    return {};
+}
+
+vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
+    switch (factor) {
+    case Maxwell::Blend::Factor::Zero:
+    case Maxwell::Blend::Factor::ZeroGL:
+        return vk::BlendFactor::eZero;
+    case Maxwell::Blend::Factor::One:
+    case Maxwell::Blend::Factor::OneGL:
+        return vk::BlendFactor::eOne;
+    case Maxwell::Blend::Factor::SourceColor:
+    case Maxwell::Blend::Factor::SourceColorGL:
+        return vk::BlendFactor::eSrcColor;
+    case Maxwell::Blend::Factor::OneMinusSourceColor:
+    case Maxwell::Blend::Factor::OneMinusSourceColorGL:
+        return vk::BlendFactor::eOneMinusSrcColor;
+    case Maxwell::Blend::Factor::SourceAlpha:
+    case Maxwell::Blend::Factor::SourceAlphaGL:
+        return vk::BlendFactor::eSrcAlpha;
+    case Maxwell::Blend::Factor::OneMinusSourceAlpha:
+    case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
+        return vk::BlendFactor::eOneMinusSrcAlpha;
+    case Maxwell::Blend::Factor::DestAlpha:
+    case Maxwell::Blend::Factor::DestAlphaGL:
+        return vk::BlendFactor::eDstAlpha;
+    case Maxwell::Blend::Factor::OneMinusDestAlpha:
+    case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
+        return vk::BlendFactor::eOneMinusDstAlpha;
+    case Maxwell::Blend::Factor::DestColor:
+    case Maxwell::Blend::Factor::DestColorGL:
+        return vk::BlendFactor::eDstColor;
+    case Maxwell::Blend::Factor::OneMinusDestColor:
+    case Maxwell::Blend::Factor::OneMinusDestColorGL:
+        return vk::BlendFactor::eOneMinusDstColor;
+    case Maxwell::Blend::Factor::SourceAlphaSaturate:
+    case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
+        return vk::BlendFactor::eSrcAlphaSaturate;
+    case Maxwell::Blend::Factor::Source1Color:
+    case Maxwell::Blend::Factor::Source1ColorGL:
+        return vk::BlendFactor::eSrc1Color;
+    case Maxwell::Blend::Factor::OneMinusSource1Color:
+    case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
+        return vk::BlendFactor::eOneMinusSrc1Color;
+    case Maxwell::Blend::Factor::Source1Alpha:
+    case Maxwell::Blend::Factor::Source1AlphaGL:
+        return vk::BlendFactor::eSrc1Alpha;
+    case Maxwell::Blend::Factor::OneMinusSource1Alpha:
+    case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
+        return vk::BlendFactor::eOneMinusSrc1Alpha;
+    case Maxwell::Blend::Factor::ConstantColor:
+    case Maxwell::Blend::Factor::ConstantColorGL:
+        return vk::BlendFactor::eConstantColor;
+    case Maxwell::Blend::Factor::OneMinusConstantColor:
+    case Maxwell::Blend::Factor::OneMinusConstantColorGL:
+        return vk::BlendFactor::eOneMinusConstantColor;
+    case Maxwell::Blend::Factor::ConstantAlpha:
+    case Maxwell::Blend::Factor::ConstantAlphaGL:
+        return vk::BlendFactor::eConstantAlpha;
+    case Maxwell::Blend::Factor::OneMinusConstantAlpha:
+    case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
+        return vk::BlendFactor::eOneMinusConstantAlpha;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
+    return {};
+}
+
+vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
+    switch (front_face) {
+    case Maxwell::Cull::FrontFace::ClockWise:
+        return vk::FrontFace::eClockwise;
+    case Maxwell::Cull::FrontFace::CounterClockWise:
+        return vk::FrontFace::eCounterClockwise;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
+    return {};
+}
+
+vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
+    switch (cull_face) {
+    case Maxwell::Cull::CullFace::Front:
+        return vk::CullModeFlagBits::eFront;
+    case Maxwell::Cull::CullFace::Back:
+        return vk::CullModeFlagBits::eBack;
+    case Maxwell::Cull::CullFace::FrontAndBack:
+        return vk::CullModeFlagBits::eFrontAndBack;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
+    return {};
+}
+
+vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
+    switch (swizzle) {
+    case Tegra::Texture::SwizzleSource::Zero:
+        return vk::ComponentSwizzle::eZero;
+    case Tegra::Texture::SwizzleSource::R:
+        return vk::ComponentSwizzle::eR;
+    case Tegra::Texture::SwizzleSource::G:
+        return vk::ComponentSwizzle::eG;
+    case Tegra::Texture::SwizzleSource::B:
+        return vk::ComponentSwizzle::eB;
+    case Tegra::Texture::SwizzleSource::A:
+        return vk::ComponentSwizzle::eA;
+    case Tegra::Texture::SwizzleSource::OneInt:
+    case Tegra::Texture::SwizzleSource::OneFloat:
+        return vk::ComponentSwizzle::eOne;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
+    return {};
+}
+
+} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
new file mode 100644
index 000000000..4cadc0721
--- /dev/null
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -0,0 +1,58 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <utility>
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/surface.h"
+#include "video_core/textures/texture.h"
+
+namespace Vulkan::MaxwellToVK {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using PixelFormat = VideoCore::Surface::PixelFormat;
+using ComponentType = VideoCore::Surface::ComponentType;
+
+namespace Sampler {
+
+vk::Filter Filter(Tegra::Texture::TextureFilter filter);
+
+vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
+
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
+
+vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
+
+} // namespace Sampler
+
+std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
+                                          PixelFormat pixel_format, ComponentType component_type);
+
+vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
+
+vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
+
+vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
+
+vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
+
+vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
+
+vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);
+
+vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
+
+vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
+
+vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face);
+
+vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face);
+
+vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
+
+} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
new file mode 100644
index 000000000..4a33a6c84
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -0,0 +1,116 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <optional>
+#include <tuple>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "core/memory.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
+                             VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+                             VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
+    : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} {
+    const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
+                       vk::BufferUsageFlagBits::eIndexBuffer |
+                       vk::BufferUsageFlagBits::eUniformBuffer;
+    const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
+                        vk::AccessFlagBits::eUniformRead;
+    stream_buffer =
+        std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
+                                         vk::PipelineStageFlagBits::eAllCommands);
+    buffer_handle = stream_buffer->GetBuffer();
+}
+
+VKBufferCache::~VKBufferCache() = default;
+
+u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
+                                bool cache) {
+    const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
+    ASSERT(cpu_addr);
+
+    // Cache management is a big overhead, so only cache entries with a given size.
+    // TODO: Figure out which size is the best for given games.
+    cache &= size >= 2048;
+
+    if (cache) {
+        if (auto entry = TryGet(*cpu_addr); entry) {
+            if (entry->size >= size && entry->alignment == alignment) {
+                return entry->offset;
+            }
+            Unregister(entry);
+        }
+    }
+
+    AlignBuffer(alignment);
+    const u64 uploaded_offset = buffer_offset;
+
+    Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+
+    buffer_ptr += size;
+    buffer_offset += size;
+
+    if (cache) {
+        auto entry = std::make_shared<CachedBufferEntry>();
+        entry->offset = uploaded_offset;
+        entry->size = size;
+        entry->alignment = alignment;
+        entry->addr = *cpu_addr;
+        Register(entry);
+    }
+
+    return uploaded_offset;
+}
+
+u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
+    AlignBuffer(alignment);
+    std::memcpy(buffer_ptr, raw_pointer, size);
+    const u64 uploaded_offset = buffer_offset;
+
+    buffer_ptr += size;
+    buffer_offset += size;
+    return uploaded_offset;
+}
+
+std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
+    AlignBuffer(alignment);
+    u8* const uploaded_ptr = buffer_ptr;
+    const u64 uploaded_offset = buffer_offset;
+
+    buffer_ptr += size;
+    buffer_offset += size;
+    return {uploaded_ptr, uploaded_offset};
+}
+
+void VKBufferCache::Reserve(std::size_t max_size) {
+    bool invalidate;
+    std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
+    buffer_offset = buffer_offset_base;
+
+    if (invalidate) {
+        InvalidateAll();
+    }
+}
+
+VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) {
+    return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base);
+}
+
+void VKBufferCache::AlignBuffer(std::size_t alignment) {
+    // Align the offset, not the mapped pointer
+    const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
+    buffer_ptr += offset_aligned - buffer_offset;
+    buffer_offset = offset_aligned;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
new file mode 100644
index 000000000..d8e916f31
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -0,0 +1,87 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKMemoryManager;
+class VKStreamBuffer;
+
+struct CachedBufferEntry final : public RasterizerCacheObject {
+    VAddr GetAddr() const override {
+        return addr;
+    }
+
+    std::size_t GetSizeInBytes() const override {
+        return size;
+    }
+
+    // We do not have to flush this cache as things in it are never modified by us.
+    void Flush() override {}
+
+    VAddr addr;
+    std::size_t size;
+    u64 offset;
+    std::size_t alignment;
+};
+
+class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+public:
+    explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
+                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+                           VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
+    ~VKBufferCache();
+
+    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
+    /// allocated.
+    u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
+                     bool cache = true);
+
+    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
+    u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
+
+    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
+    std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
+
+    /// Reserves a region of memory to be used in subsequent upload/reserve operations.
+    void Reserve(std::size_t max_size);
+
+    /// Ensures that the set data is sent to the device.
+    [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx);
+
+    /// Returns the buffer cache handle.
+    vk::Buffer GetBuffer() const {
+        return buffer_handle;
+    }
+
+private:
+    void AlignBuffer(std::size_t alignment);
+
+    Tegra::MemoryManager& tegra_memory_manager;
+
+    std::unique_ptr<VKStreamBuffer> stream_buffer;
+    vk::Buffer buffer_handle;
+
+    u8* buffer_ptr = nullptr;
+    u64 buffer_offset = 0;
+    u64 buffer_offset_base = 0;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 78a4e5f0e..00242ecbe 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -122,8 +122,7 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
                                  FormatType format_type) const {
     const auto it = format_properties.find(wanted_format);
     if (it == format_properties.end()) {
-        LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}",
-                     static_cast<u32>(wanted_format));
+        LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
         UNREACHABLE();
         return true;
     }
@@ -219,11 +218,19 @@ std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
         format_properties.emplace(format, physical.getFormatProperties(format, dldi));
     };
     AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
-    AddFormatQuery(vk::Format::eR5G6B5UnormPack16);
+    AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
+    AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
+    AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
+    AddFormatQuery(vk::Format::eR8Unorm);
     AddFormatQuery(vk::Format::eD32Sfloat);
+    AddFormatQuery(vk::Format::eD16Unorm);
     AddFormatQuery(vk::Format::eD16UnormS8Uint);
     AddFormatQuery(vk::Format::eD24UnormS8Uint);
     AddFormatQuery(vk::Format::eD32SfloatS8Uint);
+    AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
+    AddFormatQuery(vk::Format::eBc2UnormBlock);
+    AddFormatQuery(vk::Format::eBc3UnormBlock);
+    AddFormatQuery(vk::Format::eBc4UnormBlock);
 
     return format_properties;
 }
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 17ee93b91..0451babbf 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
 
 VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
                                        u8* data, u64 begin, u64 end)
-    : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {}
+    : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
 
 VKMemoryCommitImpl::~VKMemoryCommitImpl() {
     allocation->Free(this);
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
index 1678463c7..a1e117443 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
     protected_resources.push_back(resource);
 }
 
-void VKFence::Unprotect(const VKResource* resource) {
+void VKFence::Unprotect(VKResource* resource) {
     const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
-    if (it != protected_resources.end()) {
-        protected_resources.erase(it);
-    }
+    ASSERT(it != protected_resources.end());
+
+    resource->OnFenceRemoval(this);
+    protected_resources.erase(it);
 }
 
 VKFenceWatch::VKFenceWatch() = default;
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
 }
 
 void VKFenceWatch::Wait() {
-    if (!fence) {
+    if (fence == nullptr) {
         return;
     }
     fence->Wait();
     fence->Unprotect(this);
-    fence = nullptr;
 }
 
 void VKFenceWatch::Watch(VKFence& new_fence) {
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
index 5018dfa44..5bfe4cead 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -63,7 +63,7 @@ public:
     void Protect(VKResource* resource);
 
     /// Removes protection for a resource.
-    void Unprotect(const VKResource* resource);
+    void Unprotect(VKResource* resource);
 
     /// Retreives the fence.
     operator vk::Fence() const {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
new file mode 100644
index 000000000..58ffa42f2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
+constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
+    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
+                                                                                   pipeline_stage} {
+    CreateBuffers(memory_manager, usage);
+    ReserveWatches(WATCHES_INITIAL_RESERVE);
+}
+
+VKStreamBuffer::~VKStreamBuffer() = default;
+
+std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
+    ASSERT(size <= buffer_size);
+    mapped_size = size;
+
+    if (offset + size > buffer_size) {
+        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
+        // reset the state.
+        invalidation_mark = used_watches;
+        used_watches = 0;
+        offset = 0;
+    }
+
+    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+}
+
+VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
+    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
+
+    if (invalidation_mark) {
+        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
+        exctx = scheduler.Flush();
+        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
+                      [&](auto& resource) { resource->Wait(); });
+        invalidation_mark = std::nullopt;
+    }
+
+    if (used_watches + 1 >= watches.size()) {
+        // Ensure that there are enough watches.
+        ReserveWatches(WATCHES_RESERVE_CHUNK);
+    }
+    // Add a watch for this allocation.
+    watches[used_watches++]->Watch(exctx.GetFence());
+
+    offset += size;
+
+    return exctx;
+}
+
+void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
+    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
+                                         nullptr);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    commit = memory_manager.Commit(*buffer, true);
+    mapped_pointer = commit->GetData();
+}
+
+void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
+    const std::size_t previous_size = watches.size();
+    watches.resize(previous_size + grow_size);
+    std::generate(watches.begin() + previous_size, watches.end(),
+                  []() { return std::make_unique<VKFenceWatch>(); });
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
new file mode 100644
index 000000000..69d036ccd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <tuple>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKFenceWatch;
+class VKResourceManager;
+class VKScheduler;
+
+class VKStreamBuffer {
+public:
+    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+    ~VKStreamBuffer();
+
+    /**
+     * Reserves a region of memory from the stream buffer.
+     * @param size Size to reserve.
+     * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
+     * offset and a boolean that's true when buffer has been invalidated.
+     */
+    std::tuple<u8*, u64, bool> Reserve(u64 size);
+
+    /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
+    [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
+
+    vk::Buffer GetBuffer() const {
+        return *buffer;
+    }
+
+private:
+    /// Creates Vulkan buffer handles committing the required the required memory.
+    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+
+    /// Increases the amount of watches available.
+    void ReserveWatches(std::size_t grow_size);
+
+    const VKDevice& device;                      ///< Vulkan device manager.
+    VKScheduler& scheduler;                      ///< Command scheduler.
+    const u64 buffer_size;                       ///< Total size of the stream buffer.
+    const vk::AccessFlags access;                ///< Access usage of this stream buffer.
+    const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
+
+    UniqueBuffer buffer;   ///< Mapped buffer.
+    VKMemoryCommit commit; ///< Memory commit.
+    u8* mapped_pointer{};  ///< Pointer to the host visible commit
+
+    u64 offset{};      ///< Buffer iterator.
+    u64 mapped_size{}; ///< Size reserved for the current copy.
+
+    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
+    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
+    std::optional<std::size_t>
+        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 044ba116a..a7ac26d71 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
 
 PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
     switch (format) {
-        // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
-        // gamma.
     case Tegra::RenderTargetFormat::RGBA8_SRGB:
         return PixelFormat::RGBA8_SRGB;
     case Tegra::RenderTargetFormat::RGBA8_UNORM:
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
 
 #include "video_core/textures/astc.h"
 
-class BitStream {
+class InputBitStream {
 public:
-    explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
+    explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
         : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
 
-    ~BitStream() = default;
-
-    int GetBitsWritten() const {
-        return m_BitsWritten;
-    }
-
-    void WriteBitsR(unsigned int val, unsigned int nBits) {
-        for (unsigned int i = 0; i < nBits; i++) {
-            WriteBit((val >> (nBits - i - 1)) & 1);
-        }
-    }
-
-    void WriteBits(unsigned int val, unsigned int nBits) {
-        for (unsigned int i = 0; i < nBits; i++) {
-            WriteBit((val >> i) & 1);
-        }
-    }
+    ~InputBitStream() = default;
 
     int GetBitsRead() const {
         return m_BitsRead;
@@ -71,6 +55,38 @@ public:
     }
 
 private:
+    const int m_NumBits;
+    const unsigned char* m_CurByte;
+    int m_NextBit = 0;
+    int m_BitsRead = 0;
+
+    bool done = false;
+};
+
+class OutputBitStream {
+public:
+    explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
+        : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+
+    ~OutputBitStream() = default;
+
+    int GetBitsWritten() const {
+        return m_BitsWritten;
+    }
+
+    void WriteBitsR(unsigned int val, unsigned int nBits) {
+        for (unsigned int i = 0; i < nBits; i++) {
+            WriteBit((val >> (nBits - i - 1)) & 1);
+        }
+    }
+
+    void WriteBits(unsigned int val, unsigned int nBits) {
+        for (unsigned int i = 0; i < nBits; i++) {
+            WriteBit((val >> i) & 1);
+        }
+    }
+
+private:
     void WriteBit(int b) {
 
         if (done)
@@ -238,8 +254,8 @@ public:
     // Fills result with the values that are encoded in the given
     // bitstream. We must know beforehand what the maximum possible
     // value is, and how many values we're decoding.
-    static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits,
-                                      uint32_t maxRange, uint32_t nValues) {
+    static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
+                                      InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
         // Determine encoding parameters
         IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
 
@@ -267,7 +283,7 @@ public:
     }
 
 private:
-    static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
+    static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
                                 uint32_t nBitsPerValue) {
         // Implement the algorithm in section C.2.12
         uint32_t m[5];
@@ -327,7 +343,7 @@ private:
         }
     }
 
-    static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
+    static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
                                  uint32_t nBitsPerValue) {
         // Implement the algorithm in section C.2.12
         uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
     }
 };
 
-static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
+static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
     TexelWeightParams params;
 
     // Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
     return params;
 }
 
-static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
+static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
                               uint32_t blockHeight) {
     // Don't actually care about the void extent, just read the bits...
     for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
 
     // We now have enough to decode our integer sequence.
     std::vector<IntegerEncodedValue> decodedColorValues;
-    BitStream colorStream(data);
+    InputBitStream colorStream(data);
     IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
 
     // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
 #undef READ_INT_VALUES
 }
 
-static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
+static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
                             const uint32_t blockHeight, uint32_t* outBuf) {
-    BitStream strm(inBuf);
+    InputBitStream strm(inBuf);
     TexelWeightParams weightParams = DecodeBlockInfo(strm);
 
     // Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
     // Define color data.
     uint8_t colorEndpointData[16];
     memset(colorEndpointData, 0, sizeof(colorEndpointData));
-    BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
+    OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
 
     // Read extra config data...
     uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
     memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
 
     std::vector<IntegerEncodedValue> texelWeightValues;
-    BitStream weightStream(texelWeightData);
+    InputBitStream weightStream(texelWeightData);
 
     IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
                                                weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
 
 namespace Tegra::Texture::ASTC {
 
-std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
+std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
                                 uint32_t depth, uint32_t block_width, uint32_t block_height) {
     uint32_t blockIdx = 0;
     std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
         for (uint32_t j = 0; j < height; j += block_height) {
             for (uint32_t i = 0; i < width; i += block_width) {
 
-                uint8_t* blockPtr = data.data() + blockIdx * 16;
+                const uint8_t* blockPtr = data + blockIdx * 16;
 
                 // Blocks can be at most 12x12
                 uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
 
 namespace Tegra::Texture::ASTC {
 
-std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
+std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
                                 uint32_t depth, uint32_t block_width, uint32_t block_height);
 
 } // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..5e439f036
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,92 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstring>
+#include <tuple>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/textures/astc.h"
+#include "video_core/textures/convert.h"
+
+namespace Tegra::Texture {
+
+using VideoCore::Surface::PixelFormat;
+
+template <bool reverse>
+void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
+    union S8Z24 {
+        BitField<0, 24, u32> z24;
+        BitField<24, 8, u32> s8;
+    };
+    static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
+
+    union Z24S8 {
+        BitField<0, 8, u32> s8;
+        BitField<8, 24, u32> z24;
+    };
+    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
+
+    S8Z24 s8z24_pixel{};
+    Z24S8 z24s8_pixel{};
+    constexpr auto bpp{
+        VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
+    for (std::size_t y = 0; y < height; ++y) {
+        for (std::size_t x = 0; x < width; ++x) {
+            const std::size_t offset{bpp * (y * width + x)};
+            if constexpr (reverse) {
+                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
+                s8z24_pixel.s8.Assign(z24s8_pixel.s8);
+                s8z24_pixel.z24.Assign(z24s8_pixel.z24);
+                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
+            } else {
+                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
+                z24s8_pixel.s8.Assign(s8z24_pixel.s8);
+                z24s8_pixel.z24.Assign(s8z24_pixel.z24);
+                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
+            }
+        }
+    }
+}
+
+static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
+    SwapS8Z24ToZ24S8<false>(data, width, height);
+}
+
+static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
+    SwapS8Z24ToZ24S8<true>(data, width, height);
+}
+
+void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
+                            bool convert_astc, bool convert_s8z24) {
+    if (convert_astc && IsPixelFormatASTC(pixel_format)) {
+        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
+        u32 block_width{};
+        u32 block_height{};
+        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
+        const std::vector<u8> rgba8_data =
+            Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
+        std::copy(rgba8_data.begin(), rgba8_data.end(), data);
+
+    } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+        Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
+    }
+}
+
+void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
+                            bool convert_astc, bool convert_s8z24) {
+    if (convert_astc && IsPixelFormatASTC(pixel_format)) {
+        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
+                     static_cast<u32>(pixel_format));
+        UNREACHABLE();
+
+    } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+        Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
+    }
+}
+
+} // namespace Tegra::Texture
+\ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..07cd8b5da
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,18 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "video_core/surface.h"
+
+namespace Tegra::Texture {
+
+void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
+                            u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
+
+void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
+                            u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
+
+} // namespace Tegra::Texture
+\ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..cad7340f5 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
                 const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
                 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
                 const u32 pixel_index{out_x + pixel_base};
-                data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
-                data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
+                data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
+                data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
                 std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
             }
             pixel_base += stride_x;
@@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
             for (u32 xb = 0; xb < blocks_on_x; xb++) {
                 const u32 x_start = xb * block_x_elements;
                 const u32 x_end = std::min(width, x_start + block_x_elements);
-                if (fast) {
+                if constexpr (fast) {
                     FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
                                      z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
                                      layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..65df86890 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() {
     return 512;
 }
 
-/**
- * Unswizzles a swizzled texture without changing its format.
- */
+/// Unswizzles a swizzled texture without changing its format.
 void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
                       u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                       u32 block_height = TICEntry::DefaultBlockHeight,
                       u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
-/**
- * Unswizzles a swizzled texture without changing its format.
- */
+
+/// Unswizzles a swizzled texture without changing its format.
 std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
                                  u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                                  u32 block_height = TICEntry::DefaultBlockHeight,
@@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
                       u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
                       bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
 
-/**
- * Decodes an unswizzled texture into a A8R8G8B8 texture.
- */
+/// Decodes an unswizzled texture into a A8R8G8B8 texture.
 std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
                               u32 height);
 
-/**
- * This function calculates the correct size of a texture depending if it's tiled or not.
- */
+/// This function calculates the correct size of a texture depending if it's tiled or not.
 std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                           u32 block_height, u32 block_depth);
 
@@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
                     u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
                     u32 block_height);
+
 /// Copies a tiled subrectangle into a linear surface.
 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
                       u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,