35 files changed, 1087 insertions, 151 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index ccfed4f2e..db9332d00 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,6 +29,8 @@ add_library(video_core STATIC
     gpu_synch.h
     gpu_thread.cpp
     gpu_thread.h
+    guest_driver.cpp
+    guest_driver.h
     macro_interpreter.cpp
     macro_interpreter.h
     memory_manager.cpp
@@ -154,6 +156,7 @@ if (ENABLE_VULKAN)
         renderer_vulkan/maxwell_to_vk.cpp
         renderer_vulkan/maxwell_to_vk.h
         renderer_vulkan/renderer_vulkan.h
+        renderer_vulkan/renderer_vulkan.cpp
         renderer_vulkan/vk_blit_screen.cpp
         renderer_vulkan/vk_blit_screen.h
         renderer_vulkan/vk_buffer_cache.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 0510ed777..186aca61d 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -101,7 +101,10 @@ public:
     void TickFrame() {
         ++epoch;
         while (!pending_destruction.empty()) {
-            if (pending_destruction.front()->GetEpoch() + 1 > epoch) {
+            // Delay at least 4 frames before destruction.
+            // This is due to triple buffering happening on some drivers.
+            static constexpr u64 epochs_to_destroy = 5;
+            if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
                 break;
             }
             pending_destruction.pop_front();
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index 44b8b8d22..d56a47710 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
 #include "video_core/textures/texture.h"
 
 namespace Tegra::Engines {
@@ -106,6 +107,9 @@ public:
     virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                                     u64 offset) const = 0;
     virtual u32 GetBoundBuffer() const = 0;
+
+    virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
+    virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
 };
 
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 110406f2f..4b824aa4e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -94,6 +94,14 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
     return result;
 }
 
+VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
+const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
 void KeplerCompute::ProcessLaunch() {
     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 4ef3e0613..eeb79c56f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -218,6 +218,10 @@ public:
         return regs.tex_cb_index;
     }
 
+    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
+
+    const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
+
 private:
     Core::System& system;
     VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 58dfa8033..7cea146f0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -784,4 +784,12 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
     return result;
 }
 
+VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
+const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ee79260fc..8808bbf76 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1306,6 +1306,10 @@ public:
         return regs.tex_cb_index;
     }
 
+    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
+
+    const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
+
     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
     /// we've seen used.
     using MacroMemory = std::array<u32, 0x40000>;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 6376b579b..cbb201114 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -227,6 +227,28 @@ enum class AtomicOp : u64 {
     Exch = 8,
 };
 
+enum class GlobalAtomicOp : u64 {
+    Add = 0,
+    Min = 1,
+    Max = 2,
+    Inc = 3,
+    Dec = 4,
+    And = 5,
+    Or = 6,
+    Xor = 7,
+    Exch = 8,
+    SafeAdd = 10,
+};
+
+enum class GlobalAtomicType : u64 {
+    U32 = 0,
+    S32 = 1,
+    U64 = 2,
+    F32_FTZ_RN = 3,
+    F16x2_FTZ_RN = 4,
+    S64 = 5,
+};
+
 enum class UniformType : u64 {
     UnsignedByte = 0,
     SignedByte = 1,
@@ -958,6 +980,12 @@ union Instruction {
     } stg;
 
     union {
+        BitField<52, 4, GlobalAtomicOp> operation;
+        BitField<49, 3, GlobalAtomicType> type;
+        BitField<28, 20, s64> offset;
+    } atom;
+
+    union {
         BitField<52, 4, AtomicOp> operation;
         BitField<28, 2, AtomicType> type;
         BitField<30, 22, s64> offset;
@@ -1695,6 +1723,7 @@ public:
         ST_S,
         ST,    // Store in generic memory
         STG,   // Store in global memory
+        ATOM,  // Atomic operation on global memory
         ATOMS, // Atomic operation on shared memory
         AL2P,  // Transforms attribute memory into physical memory
         TEX,
@@ -2000,6 +2029,7 @@ private:
             INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
             INST("101-------------", Id::ST, Type::Memory, "ST"),
             INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+            INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
             INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
             INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
             INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
new file mode 100644
index 000000000..6adef459e
--- /dev/null
+++ b/src/video_core/guest_driver.cpp
@@ -0,0 +1,36 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <limits>
+
+#include "video_core/guest_driver.h"
+
+namespace VideoCore {
+
+void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
+    if (texture_handler_size_deduced) {
+        return;
+    }
+    const std::size_t size = bound_offsets.size();
+    if (size < 2) {
+        return;
+    }
+    std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
+    u32 min_val = std::numeric_limits<u32>::max();
+    for (std::size_t i = 1; i < size; ++i) {
+        if (bound_offsets[i] == bound_offsets[i - 1]) {
+            continue;
+        }
+        const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
+        min_val = std::min(min_val, new_min);
+    }
+    if (min_val > 2) {
+        return;
+    }
+    texture_handler_size_deduced = true;
+    texture_handler_size = min_texture_handler_size * min_val;
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
new file mode 100644
index 000000000..fc1917347
--- /dev/null
+++ b/src/video_core/guest_driver.h
@@ -0,0 +1,41 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace VideoCore {
+
+/**
+ * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
+ * information necessary for impossible to avoid HLE methods like shader tracks as they are
+ * Entscheidungsproblems.
+ */
+class GuestDriverProfile {
+public:
+    void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
+
+    u32 GetTextureHandlerSize() const {
+        return texture_handler_size;
+    }
+
+    bool TextureHandlerSizeKnown() const {
+        return texture_handler_size_deduced;
+    }
+
+private:
+    // Minimum size of texture handler any driver can use.
+    static constexpr u32 min_texture_handler_size = 4;
+    // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
+    // use 4 bytes instead. Thus, certain drivers may squish the size.
+    static constexpr u32 default_texture_handler_size = 8;
+
+    u32 texture_handler_size = default_texture_handler_size;
+    bool texture_handler_size_deduced = false;
+};
+
+} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5b0eca9e2..c586cd6fe 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
+#include "video_core/guest_driver.h"
 
 namespace Tegra {
 class MemoryManager;
@@ -78,5 +79,18 @@ public:
     /// Initialize disk cached resources for the game being emulated
     virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
                                    const DiskResourceLoadCallback& callback = {}) {}
+
+    /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
+    GuestDriverProfile& AccessGuestDriverProfile() {
+        return guest_driver_profile;
+    }
+
+    /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
+    const GuestDriverProfile& AccessGuestDriverProfile() const {
+        return guest_driver_profile;
+    }
+
+private:
+    GuestDriverProfile guest_driver_profile{};
 };
 } // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c428f06e4..362942e09 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -55,16 +55,20 @@ namespace {
 
 template <typename Engine, typename Entry>
 Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
-                                               Tegra::Engines::ShaderType shader_type) {
+                                               Tegra::Engines::ShaderType shader_type,
+                                               std::size_t index = 0) {
     if (entry.IsBindless()) {
         const Tegra::Texture::TextureHandle tex_handle =
             engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
         return engine.GetTextureInfo(tex_handle);
     }
+    const auto& gpu_profile = engine.AccessGuestDriverProfile();
+    const u32 offset =
+        entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
     if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
-        return engine.GetStageTexture(shader_type, entry.GetOffset());
+        return engine.GetStageTexture(shader_type, offset);
     } else {
-        return engine.GetTexture(entry.GetOffset());
+        return engine.GetTexture(offset);
     }
 }
 
@@ -942,8 +946,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
     u32 binding = device.GetBaseBindings(stage_index).sampler;
     for (const auto& entry : shader->GetShaderEntries().samplers) {
         const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
-        const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
-        SetupTexture(binding++, texture, entry);
+        if (!entry.IsIndexed()) {
+            const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
+            SetupTexture(binding++, texture, entry);
+        } else {
+            for (std::size_t i = 0; i < entry.Size(); ++i) {
+                const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
+                SetupTexture(binding++, texture, entry);
+            }
+        }
     }
 }
 
@@ -952,8 +963,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
     const auto& compute = system.GPU().KeplerCompute();
     u32 binding = 0;
     for (const auto& entry : kernel->GetShaderEntries().samplers) {
-        const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
-        SetupTexture(binding++, texture, entry);
+        if (!entry.IsIndexed()) {
+            const auto texture =
+                GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
+            SetupTexture(binding++, texture, entry);
+        } else {
+            for (std::size_t i = 0; i < entry.Size(); ++i) {
+                const auto texture =
+                    GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
+                SetupTexture(binding++, texture, entry);
+            }
+        }
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3c5bdd377..489eb143c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s
 }
 
 void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
+    locker.SetBoundBuffer(usage.bound_buffer);
     for (const auto& key : usage.keys) {
         const auto [buffer, offset] = key.first;
         locker.InsertKey(buffer, offset, key.second);
@@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() {
 
 ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
                                             const ConstBufferLocker& locker) const {
-    return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(),
+    return ShaderDiskCacheUsage{unique_identifier,         variant,
+                                locker.GetBoundBuffer(),   locker.GetKeys(),
                                 locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
 }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2996aaf08..4735000b5 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -391,6 +391,7 @@ public:
         DeclareVertex();
         DeclareGeometry();
         DeclareRegisters();
+        DeclareCustomVariables();
         DeclarePredicates();
         DeclareLocalMemory();
         DeclareInternalFlags();
@@ -503,6 +504,16 @@ private:
         }
     }
 
+    void DeclareCustomVariables() {
+        const u32 num_custom_variables = ir.GetNumCustomVariables();
+        for (u32 i = 0; i < num_custom_variables; ++i) {
+            code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
+        }
+        if (num_custom_variables > 0) {
+            code.AddNewLine();
+        }
+    }
+
     void DeclarePredicates() {
         const auto& predicates = ir.GetPredicates();
         for (const auto pred : predicates) {
@@ -655,7 +666,8 @@ private:
         u32 binding = device.GetBaseBindings(stage).sampler;
         for (const auto& sampler : ir.GetSamplers()) {
             const std::string name = GetSampler(sampler);
-            const std::string description = fmt::format("layout (binding = {}) uniform", binding++);
+            const std::string description = fmt::format("layout (binding = {}) uniform", binding);
+            binding += sampler.IsIndexed() ? sampler.Size() : 1;
 
             std::string sampler_type = [&]() {
                 if (sampler.IsBuffer()) {
@@ -682,7 +694,11 @@ private:
                 sampler_type += "Shadow";
             }
 
-            code.AddLine("{} {} {};", description, sampler_type, name);
+            if (!sampler.IsIndexed()) {
+                code.AddLine("{} {} {};", description, sampler_type, name);
+            } else {
+                code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size());
+            }
         }
         if (!ir.GetSamplers().empty()) {
             code.AddNewLine();
@@ -775,6 +791,11 @@ private:
             return {GetRegister(index), Type::Float};
         }
 
+        if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+            const u32 index = cv->GetIndex();
+            return {GetCustomVariable(index), Type::Float};
+        }
+
         if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
             const u32 value = immediate->GetValue();
             if (value < 10) {
@@ -1019,7 +1040,6 @@ private:
                 }
                 return {{"gl_ViewportIndex", Type::Int}};
             case 3:
-                UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
                 return {{"gl_PointSize", Type::Float}};
             }
             return {};
@@ -1099,7 +1119,11 @@ private:
         } else if (!meta->ptp.empty()) {
             expr += "Offsets";
         }
-        expr += '(' + GetSampler(meta->sampler) + ", ";
+        if (!meta->sampler.IsIndexed()) {
+            expr += '(' + GetSampler(meta->sampler) + ", ";
+        } else {
+            expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
+        }
         expr += coord_constructors.at(count + (has_array ? 1 : 0) +
                                       (has_shadow && !separate_dc ? 1 : 0) - 1);
         expr += '(';
@@ -1311,6 +1335,8 @@ private:
             const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
             target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
                       Type::Uint};
+        } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
+            target = {GetCustomVariable(cv->GetIndex()), Type::Float};
         } else {
             UNREACHABLE_MSG("Assign called without a proper target");
         }
@@ -1858,10 +1884,7 @@ private:
 
     template <const std::string_view& opname, Type type>
     Expression Atomic(Operation operation) {
-        ASSERT(stage == ShaderType::Compute);
-        auto& smem = std::get<SmemNode>(*operation[0]);
-
-        return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
+        return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
                             Visit(operation[1]).As(type)),
                 type};
     }
@@ -2241,6 +2264,10 @@ private:
         return GetDeclarationWithSuffix(index, "gpr");
     }
 
+    std::string GetCustomVariable(u32 index) const {
+        return GetDeclarationWithSuffix(index, "custom_var");
+    }
+
     std::string GetPredicate(Tegra::Shader::Pred pred) const {
         return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index cf874a09a..1fc204f6f 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -53,7 +53,7 @@ struct BindlessSamplerKey {
     Tegra::Engines::SamplerDescriptor sampler{};
 };
 
-constexpr u32 NativeVersion = 11;
+constexpr u32 NativeVersion = 12;
 
 // Making sure sizes doesn't change by accident
 static_assert(sizeof(ProgramVariant) == 20);
@@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
             u32 num_bound_samplers{};
             u32 num_bindless_samplers{};
             if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
-                file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
+                file.ReadArray(&usage.variant, 1) != 1 ||
+                file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
                 file.ReadArray(&num_bound_samplers, 1) != 1 ||
                 file.ReadArray(&num_bindless_samplers, 1) != 1) {
                 LOG_ERROR(Render_OpenGL, error_loading);
@@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
         u32 num_bindless_samplers{};
         ShaderDiskCacheUsage usage;
         if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
-            !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) ||
+            !LoadObjectFromPrecompiled(usage.variant) ||
+            !LoadObjectFromPrecompiled(usage.bound_buffer) ||
+            !LoadObjectFromPrecompiled(num_keys) ||
             !LoadObjectFromPrecompiled(num_bound_samplers) ||
             !LoadObjectFromPrecompiled(num_bindless_samplers)) {
             return {};
@@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
 
     if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
         file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
+        file.WriteObject(usage.bound_buffer) != 1 ||
         file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
         file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
         file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
@@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
     };
 
     if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
-        !SaveObjectToPrecompiled(usage.variant) ||
+        !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
         !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
         !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
         !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 69a2fbdda..ef2371f6d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>);
 struct ShaderDiskCacheUsage {
     u64 unique_identifier{};
     ProgramVariant variant;
+    u32 bound_buffer{};
     VideoCommon::Shader::KeyMap keys;
     VideoCommon::Shader::BoundSamplerMap bound_samplers;
     VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index e95eb069e..d4b81cd87 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -176,6 +176,19 @@ GLint GetSwizzleSource(SwizzleSource source) {
     return GL_NONE;
 }
 
+GLenum GetComponent(PixelFormat format, bool is_first) {
+    switch (format) {
+    case PixelFormat::Z24S8:
+    case PixelFormat::Z32FS8:
+        return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
+    case PixelFormat::S8Z24:
+        return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+    default:
+        UNREACHABLE();
+        return GL_DEPTH_COMPONENT;
+    }
+}
+
 void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
     if (params.IsBuffer()) {
         return;
@@ -184,7 +197,7 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
     glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
     glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
     glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1);
+    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1));
     if (params.num_levels == 1) {
         glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
     }
@@ -416,11 +429,21 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou
     if (new_swizzle == swizzle)
         return;
     swizzle = new_swizzle;
-    const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
-                                             GetSwizzleSource(z_source),
-                                             GetSwizzleSource(w_source)};
+    const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
+                                   GetSwizzleSource(z_source), GetSwizzleSource(w_source)};
     const GLuint handle = GetTexture();
-    glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+    const PixelFormat format = surface.GetSurfaceParams().pixel_format;
+    switch (format) {
+    case PixelFormat::Z24S8:
+    case PixelFormat::Z32FS8:
+    case PixelFormat::S8Z24:
+        glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
+                            GetComponent(format, x_source == SwizzleSource::R));
+        break;
+    default:
+        glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+        break;
+    }
 }
 
 OGLTextureView CachedSurfaceView::CreateTextureView() const {
@@ -529,8 +552,11 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
     const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
     const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
 
-    glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
-                      dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
+    glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top),
+                      static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom),
+                      static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top),
+                      static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom),
+                      buffers,
                       is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
 }
 
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
new file mode 100644
index 000000000..d5032b432
--- /dev/null
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -0,0 +1,265 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/telemetry.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/frontend/emu_window.h"
+#include "core/memory.h"
+#include "core/perf_stats.h"
+#include "core/settings.h"
+#include "core/telemetry_session.h"
+#include "video_core/gpu.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+
+namespace Vulkan {
+
+namespace {
+
+VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
+                       VkDebugUtilsMessageTypeFlagsEXT type,
+                       const VkDebugUtilsMessengerCallbackDataEXT* data,
+                       [[maybe_unused]] void* user_data) {
+    const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_};
+    const char* message{data->pMessage};
+
+    if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) {
+        LOG_CRITICAL(Render_Vulkan, "{}", message);
+    } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) {
+        LOG_WARNING(Render_Vulkan, "{}", message);
+    } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo) {
+        LOG_INFO(Render_Vulkan, "{}", message);
+    } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose) {
+        LOG_DEBUG(Render_Vulkan, "{}", message);
+    }
+    return VK_FALSE;
+}
+
+std::string GetReadableVersion(u32 version) {
+    return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
+                       VK_VERSION_PATCH(version));
+}
+
+std::string GetDriverVersion(const VKDevice& device) {
+    // Extracted from
+    // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
+    const u32 version = device.GetDriverVersion();
+
+    if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+        const u32 major = (version >> 22) & 0x3ff;
+        const u32 minor = (version >> 14) & 0x0ff;
+        const u32 secondary = (version >> 6) & 0x0ff;
+        const u32 tertiary = version & 0x003f;
+        return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
+    }
+    if (device.GetDriverID() == vk::DriverIdKHR::eIntelProprietaryWindows) {
+        const u32 major = version >> 14;
+        const u32 minor = version & 0x3fff;
+        return fmt::format("{}.{}", major, minor);
+    }
+
+    return GetReadableVersion(version);
+}
+
+std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) {
+    std::sort(std::begin(available_extensions), std::end(available_extensions));
+
+    static constexpr std::size_t AverageExtensionSize = 64;
+    std::string separated_extensions;
+    separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
+
+    const auto end = std::end(available_extensions);
+    for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
+        if (const bool is_last = extension + 1 == end; is_last) {
+            separated_extensions += *extension;
+        } else {
+            separated_extensions += fmt::format("{},", *extension);
+        }
+    }
+    return separated_extensions;
+}
+
+} // Anonymous namespace
+
+RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system)
+    : RendererBase(window), system{system} {}
+
+RendererVulkan::~RendererVulkan() {
+    ShutDown();
+}
+
+void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+    const auto& layout = render_window.GetFramebufferLayout();
+    if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
+        const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
+        const bool use_accelerated =
+            rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
+        const bool is_srgb = use_accelerated && screen_info.is_srgb;
+        if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
+            swapchain->Create(layout.width, layout.height, is_srgb);
+            blit_screen->Recreate();
+        }
+
+        scheduler->WaitWorker();
+
+        swapchain->AcquireNextImage();
+        const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated);
+
+        scheduler->Flush(false, render_semaphore);
+
+        if (swapchain->Present(render_semaphore, fence)) {
+            blit_screen->Recreate();
+        }
+
+        render_window.SwapBuffers();
+        rasterizer->TickFrame();
+    }
+
+    render_window.PollEvents();
+}
+
+bool RendererVulkan::Init() {
+    PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
+    render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface);
+    const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr);
+
+    std::optional<vk::DebugUtilsMessengerEXT> callback;
+    if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) {
+        callback = CreateDebugCallback(dldi);
+        if (!callback) {
+            return false;
+        }
+    }
+
+    if (!PickDevices(dldi)) {
+        if (callback) {
+            instance.destroy(*callback, nullptr, dldi);
+        }
+        return false;
+    }
+    debug_callback = UniqueDebugUtilsMessengerEXT(
+        *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>(
+                       instance, nullptr, device->GetDispatchLoader()));
+
+    Report();
+
+    memory_manager = std::make_unique<VKMemoryManager>(*device);
+
+    resource_manager = std::make_unique<VKResourceManager>(*device);
+
+    const auto& framebuffer = render_window.GetFramebufferLayout();
+    swapchain = std::make_unique<VKSwapchain>(surface, *device);
+    swapchain->Create(framebuffer.width, framebuffer.height, false);
+
+    scheduler = std::make_unique<VKScheduler>(*device, *resource_manager);
+
+    rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
+                                                    *resource_manager, *memory_manager, *scheduler);
+
+    blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
+                                                 *resource_manager, *memory_manager, *swapchain,
+                                                 *scheduler, screen_info);
+
+    return true;
+}
+
+void RendererVulkan::ShutDown() {
+    if (!device) {
+        return;
+    }
+    const auto dev = device->GetLogical();
+    const auto& dld = device->GetDispatchLoader();
+    if (dev && dld.vkDeviceWaitIdle) {
+        dev.waitIdle(dld);
+    }
+
+    rasterizer.reset();
+    blit_screen.reset();
+    scheduler.reset();
+    swapchain.reset();
+    memory_manager.reset();
+    resource_manager.reset();
+    device.reset();
+}
+
+std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
+    const vk::DispatchLoaderDynamic& dldi) {
+    const vk::DebugUtilsMessengerCreateInfoEXT callback_ci(
+        {},
+        vk::DebugUtilsMessageSeverityFlagBitsEXT::eError |
+            vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning |
+            vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo |
+            vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose,
+        vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral |
+            vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation |
+            vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance,
+        &DebugCallback, nullptr);
+    vk::DebugUtilsMessengerEXT callback;
+    if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) !=
+        vk::Result::eSuccess) {
+        LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
+        return {};
+    }
+    return callback;
+}
+
+bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) {
+    const auto devices = instance.enumeratePhysicalDevices(dldi);
+
+    // TODO(Rodrigo): Choose device from config file
+    const s32 device_index = Settings::values.vulkan_device;
+    if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
+        LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
+        return false;
+    }
+    const vk::PhysicalDevice physical_device = devices[device_index];
+
+    if (!VKDevice::IsSuitable(dldi, physical_device, surface)) {
+        return false;
+    }
+
+    device = std::make_unique<VKDevice>(dldi, physical_device, surface);
+    return device->Create(dldi, instance);
+}
+
+void RendererVulkan::Report() const {
+    const std::string vendor_name{device->GetVendorName()};
+    const std::string model_name{device->GetModelName()};
+    const std::string driver_version = GetDriverVersion(*device);
+    const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
+
+    const std::string api_version = GetReadableVersion(device->GetApiVersion());
+
+    const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
+
+    LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
+    LOG_INFO(Render_Vulkan, "Device: {}", model_name);
+    LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
+
+    auto& telemetry_session = system.TelemetrySession();
+    constexpr auto field = Telemetry::FieldType::UserSystem;
+    telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
+    telemetry_session.AddField(field, "GPU_Model", model_name);
+    telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
+    telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version);
+    telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
+}
+
+} // namespace Vulkan
+\ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 939eebe83..9840f26e5 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -400,8 +400,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
              VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
         Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
              false);
-        Test(extension, nv_device_diagnostic_checkpoints,
-             VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
+        if (Settings::values.renderer_debug) {
+            Test(extension, nv_device_diagnostic_checkpoints,
+                 VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
+        }
     }
 
     if (khr_shader_float16_int8) {
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b53078721..24a658dce 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -353,6 +353,7 @@ private:
         DeclareFragment();
         DeclareCompute();
         DeclareRegisters();
+        DeclareCustomVariables();
         DeclarePredicates();
         DeclareLocalMemory();
         DeclareSharedMemory();
@@ -586,6 +587,15 @@ private:
         }
     }
 
+    void DeclareCustomVariables() {
+        const u32 num_custom_variables = ir.GetNumCustomVariables();
+        for (u32 i = 0; i < num_custom_variables; ++i) {
+            const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
+            Name(id, fmt::format("custom_var_{}", i));
+            custom_variables.emplace(i, AddGlobalVariable(id));
+        }
+    }
+
     void DeclarePredicates() {
         for (const auto pred : ir.GetPredicates()) {
             const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
@@ -982,6 +992,11 @@ private:
             return {OpLoad(t_float, registers.at(index)), Type::Float};
         }
 
+        if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+            const u32 index = cv->GetIndex();
+            return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
+        }
+
         if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
             return {Constant(t_uint, immediate->GetValue()), Type::Uint};
         }
@@ -1123,15 +1138,7 @@ private:
         }
 
         if (const auto gmem = std::get_if<GmemNode>(&*node)) {
-            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
-            const Id real = AsUint(Visit(gmem->GetRealAddress()));
-            const Id base = AsUint(Visit(gmem->GetBaseAddress()));
-
-            Id offset = OpISub(t_uint, real, base);
-            offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U));
-            return {OpLoad(t_float,
-                           OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)),
-                    Type::Float};
+            return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
         }
 
         if (const auto lmem = std::get_if<LmemNode>(&*node)) {
@@ -1142,10 +1149,7 @@ private:
         }
 
         if (const auto smem = std::get_if<SmemNode>(&*node)) {
-            Id address = AsUint(Visit(smem->GetAddress()));
-            address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
-            const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
-            return {OpLoad(t_uint, pointer), Type::Uint};
+            return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
         }
 
         if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@@ -1339,20 +1343,13 @@ private:
             target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
 
         } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
-            ASSERT(stage == ShaderType::Compute);
-            Id address = AsUint(Visit(smem->GetAddress()));
-            address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
-            target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint};
+            target = {GetSharedMemoryPointer(*smem), Type::Uint};
 
         } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
-            const Id real = AsUint(Visit(gmem->GetRealAddress()));
-            const Id base = AsUint(Visit(gmem->GetBaseAddress()));
-            const Id diff = OpISub(t_uint, real, base);
-            const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
+            target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
 
-            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
-            target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset),
-                      Type::Float};
+        } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
+            target = {custom_variables.at(cv->GetIndex()), Type::Float};
 
         } else {
             UNIMPLEMENTED();
@@ -1804,11 +1801,16 @@ private:
         return {};
     }
 
-    Expression UAtomicAdd(Operation operation) {
-        const auto& smem = std::get<SmemNode>(*operation[0]);
-        Id address = AsUint(Visit(smem.GetAddress()));
-        address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
-        const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
+    Expression AtomicAdd(Operation operation) {
+        Id pointer;
+        if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
+            pointer = GetSharedMemoryPointer(*smem);
+        } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
+            pointer = GetGlobalMemoryPointer(*gmem);
+        } else {
+            UNREACHABLE();
+            return {Constant(t_uint, 0), Type::Uint};
+        }
 
         const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
         const Id semantics = Constant(t_uint, 0U);
@@ -2243,6 +2245,22 @@ private:
         return {};
     }
 
+    Id GetGlobalMemoryPointer(const GmemNode& gmem) {
+        const Id real = AsUint(Visit(gmem.GetRealAddress()));
+        const Id base = AsUint(Visit(gmem.GetBaseAddress()));
+        const Id diff = OpISub(t_uint, real, base);
+        const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
+        const Id buffer = global_buffers.at(gmem.GetDescriptor());
+        return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
+    }
+
+    Id GetSharedMemoryPointer(const SmemNode& smem) {
+        ASSERT(stage == ShaderType::Compute);
+        Id address = AsUint(Visit(smem.GetAddress()));
+        address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
+        return OpAccessChain(t_smem_uint, shared_memory, address);
+    }
+
     static constexpr std::array operation_decompilers = {
         &SPIRVDecompiler::Assign,
 
@@ -2389,7 +2407,7 @@ private:
         &SPIRVDecompiler::AtomicImageXor,
         &SPIRVDecompiler::AtomicImageExchange,
 
-        &SPIRVDecompiler::UAtomicAdd,
+        &SPIRVDecompiler::AtomicAdd,
 
         &SPIRVDecompiler::Branch,
         &SPIRVDecompiler::BranchIndirect,
@@ -2485,9 +2503,9 @@ private:
 
     Id t_smem_uint{};
 
-    const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
+    const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
     const Id t_gmem_array =
-        Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray");
+        Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
     const Id t_gmem_struct = MemberDecorate(
         Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
     const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
@@ -2508,6 +2526,7 @@ private:
     Id out_vertex{};
     Id in_vertex{};
     std::map<u32, Id> registers;
+    std::map<u32, Id> custom_variables;
     std::map<Tegra::Shader::Pred, Id> predicates;
     std::map<u32, Id> flow_variables;
     Id local_memory{};
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
index a2f0044ba..cca13bcde 100644
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -65,8 +65,8 @@ public:
     void DetachSegment(ASTNode start, ASTNode end);
     void Remove(ASTNode node);
 
-    ASTNode first{};
-    ASTNode last{};
+    ASTNode first;
+    ASTNode last;
 };
 
 class ASTProgram {
@@ -299,9 +299,9 @@ private:
     friend class ASTZipper;
 
     ASTData data;
-    ASTNode parent{};
-    ASTNode next{};
-    ASTNode previous{};
+    ASTNode parent;
+    ASTNode next;
+    ASTNode previous;
     ASTZipper* manager{};
 };
 
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
index a4a0319eb..0638be8cb 100644
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle
     return value;
 }
 
+std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
+    if (bound_buffer_saved) {
+        return bound_buffer;
+    }
+    if (!engine) {
+        return std::nullopt;
+    }
+    bound_buffer_saved = true;
+    bound_buffer = engine->GetBoundBuffer();
+    return bound_buffer;
+}
+
 void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
     keys.insert_or_assign({buffer, offset}, value);
 }
@@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes
     bindless_samplers.insert_or_assign({buffer, offset}, sampler);
 }
 
+void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
+    bound_buffer_saved = true;
+    bound_buffer = buffer;
+}
+
 bool ConstBufferLocker::IsConsistent() const {
     if (!engine) {
         return false;
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index d32e2d657..d3ea11087 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -10,6 +10,7 @@
 #include "common/hash.h"
 #include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
 
 namespace VideoCommon::Shader {
 
@@ -40,6 +41,8 @@ public:
 
     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
 
+    std::optional<u32> ObtainBoundBuffer();
+
     /// Inserts a key.
     void InsertKey(u32 buffer, u32 offset, u32 value);
 
@@ -49,6 +52,9 @@ public:
     /// Inserts a bindless sampler key.
     void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
 
+    /// Set the bound buffer for this locker.
+    void SetBoundBuffer(u32 buffer);
+
     /// Checks keys and samplers against engine's current const buffers. Returns true if they are
     /// the same value, false otherwise;
     bool IsConsistent() const;
@@ -71,12 +77,27 @@ public:
         return bindless_samplers;
     }
 
+    /// Gets bound buffer used on this shader
+    u32 GetBoundBuffer() const {
+        return bound_buffer;
+    }
+
+    /// Obtains access to the guest driver's profile.
+    VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
+        if (engine) {
+            return &engine->AccessGuestDriverProfile();
+        }
+        return nullptr;
+    }
+
 private:
     const Tegra::Engines::ShaderType stage;
     Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
     KeyMap keys;
     BoundSamplerMap bound_samplers;
     BindlessSamplerMap bindless_samplers;
+    bool bound_buffer_saved{};
+    u32 bound_buffer{};
 };
 
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 22c3e5120..6b697ed5d 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <cstring>
+#include <limits>
 #include <set>
 
 #include <fmt/format.h>
@@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
     return (absolute_offset % SchedPeriod) == 0;
 }
 
+void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
+                              const std::list<Sampler>& used_samplers) {
+    if (gpu_driver == nullptr) {
+        LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
+        return;
+    }
+    if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
+        return;
+    }
+    u32 count{};
+    std::vector<u32> bound_offsets;
+    for (const auto& sampler : used_samplers) {
+        if (sampler.IsBindless()) {
+            continue;
+        }
+        ++count;
+        bound_offsets.emplace_back(sampler.GetOffset());
+    }
+    if (count > 1) {
+        gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
+    }
+}
+
+std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
+                                        VideoCore::GuestDriverProfile* gpu_driver,
+                                        const std::list<Sampler>& used_samplers) {
+    if (gpu_driver == nullptr) {
+        LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
+        return std::nullopt;
+    }
+    const u32 base_offset = sampler_to_deduce.GetOffset();
+    u32 max_offset{std::numeric_limits<u32>::max()};
+    for (const auto& sampler : used_samplers) {
+        if (sampler.IsBindless()) {
+            continue;
+        }
+        if (sampler.GetOffset() > base_offset) {
+            max_offset = std::min(sampler.GetOffset(), max_offset);
+        }
+    }
+    if (max_offset == std::numeric_limits<u32>::max()) {
+        return std::nullopt;
+    }
+    return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
+}
+
 } // Anonymous namespace
 
 class ASTDecoder {
@@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
     return pc + 1;
 }
 
+void ShaderIR::PostDecode() {
+    // Deduce texture handler size if needed
+    auto gpu_driver = locker.AccessGuestDriverProfile();
+    DeduceTextureHandlerSize(gpu_driver, used_samplers);
+    // Deduce Indexed Samplers
+    if (!uses_indexed_samplers) {
+        return;
+    }
+    for (auto& sampler : used_samplers) {
+        if (!sampler.IsIndexed()) {
+            continue;
+        }
+        if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
+            sampler.SetSize(*size);
+        } else {
+            LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
+            sampler.SetSize(1);
+        }
+    }
+}
+
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 371fae127..e60875cc4 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -297,7 +297,7 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod
     const Node one = Immediate(1);
     const Node two = Immediate(2);
 
-    Node value{};
+    Node value;
     for (u32 i = 0; i < lop_iterations; ++i) {
         const Node shift_amount = Immediate(i);
 
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 7591a715f..b5fbc4d58 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -19,9 +19,12 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::AtomicOp;
 using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
+using Tegra::Shader::GlobalAtomicOp;
+using Tegra::Shader::GlobalAtomicType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
+using Tegra::Shader::StoreType;
 
 namespace {
 
@@ -61,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
     }
 }
 
+Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
+    Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
+    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
+    return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
+                     Immediate(size));
+}
+
+Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
+    Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
+    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
+    return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
+                     std::move(offset), Immediate(size));
+}
+
+Node Sign16Extend(Node value) {
+    Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
+    Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
+    Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
+    return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
+}
+
 } // Anonymous namespace
 
 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -136,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
         [[fallthrough]];
     case OpCode::Id::LD_S: {
-        const auto GetMemory = [&](s32 offset) {
+        const auto GetAddress = [&](s32 offset) {
             ASSERT(offset % 4 == 0);
             const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
-            const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
-                                           immediate_offset);
-            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address)
-                                                             : GetLocalMemory(address);
+            return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
+        };
+        const auto GetMemory = [&](s32 offset) {
+            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
+                                                             : GetLocalMemory(GetAddress(offset));
         };
 
         switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bits32:
-        case Tegra::Shader::StoreType::Bits64:
-        case Tegra::Shader::StoreType::Bits128: {
-            const u32 count = [&]() {
+        case StoreType::Signed16:
+            SetRegister(bb, instr.gpr0,
+                        Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
+            break;
+        case StoreType::Bits32:
+        case StoreType::Bits64:
+        case StoreType::Bits128: {
+            const u32 count = [&] {
                 switch (instr.ldst_sl.type.Value()) {
-                case Tegra::Shader::StoreType::Bits32:
+                case StoreType::Bits32:
                     return 1;
-                case Tegra::Shader::StoreType::Bits64:
+                case StoreType::Bits64:
                     return 2;
-                case Tegra::Shader::StoreType::Bits128:
+                case StoreType::Bits128:
                     return 4;
                 default:
                     UNREACHABLE();
@@ -212,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             // To handle unaligned loads get the bytes used to dereference global memory and extract
             // those bytes from the loaded u32.
             if (IsUnaligned(type)) {
-                Node mask = Immediate(GetUnalignedMask(type));
-                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
-                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
-
-                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
-                                 std::move(offset), Immediate(size));
+                gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
             }
 
             SetTemporary(bb, i, gmem);
@@ -269,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
         };
 
-        const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L
-                                    ? &ShaderIR::SetLocalMemory
-                                    : &ShaderIR::SetSharedMemory;
+        const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
+        const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
+        const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
 
         switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bits128:
+        case StoreType::Bits128:
             (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
             (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
             [[fallthrough]];
-        case Tegra::Shader::StoreType::Bits64:
+        case StoreType::Bits64:
             (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
             [[fallthrough]];
-        case Tegra::Shader::StoreType::Bits32:
+        case StoreType::Bits32:
             (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
             break;
+        case StoreType::Signed16: {
+            Node address = GetAddress(0);
+            Node memory = (this->*get_memory)(address);
+            (this->*set_memory)(
+                bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
+            break;
+        }
         default:
             UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
                               static_cast<u32>(instr.ldst_sl.type.Value()));
@@ -323,18 +354,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             Node value = GetRegister(instr.gpr0.Value() + i);
 
             if (IsUnaligned(type)) {
-                Node mask = Immediate(GetUnalignedMask(type));
-                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
-                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
-
-                value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
-                                  Immediate(size));
+                const u32 mask = GetUnalignedMask(type);
+                value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
             }
 
             bb.push_back(Operation(OperationCode::Assign, gmem, value));
         }
         break;
     }
+    case OpCode::Id::ATOM: {
+        UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
+                             static_cast<int>(instr.atom.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
+                             static_cast<int>(instr.atom.type.Value()));
+
+        const auto [real_address, base_address, descriptor] =
+            TrackGlobalMemory(bb, instr, true, true);
+        if (!real_address || !base_address) {
+            // Tracking failed, skip atomic.
+            break;
+        }
+
+        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+        Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
     case OpCode::Id::ATOMS: {
         UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
                              static_cast<int>(instr.atoms.operation.Value()));
@@ -348,7 +393,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         Node memory = GetSharedMemory(std::move(address));
         Node data = GetRegister(instr.gpr20);
 
-        Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
+        Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
         SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 7321698b2..4944e9d69 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -69,13 +69,16 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
     case OpCode::Id::MOV_SYS: {
         const Node value = [this, instr] {
             switch (instr.sys20) {
+            case SystemVariable::LaneId:
+                LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
+                return Immediate(0U);
             case SystemVariable::InvocationId:
                 return Operation(OperationCode::InvocationId);
             case SystemVariable::Ydirection:
                 return Operation(OperationCode::YNegate);
             case SystemVariable::InvocationInfo:
                 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
-                return Immediate(0u);
+                return Immediate(0U);
             case SystemVariable::Tid: {
                 Node value = Immediate(0);
                 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9);
@@ -188,7 +191,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
                              static_cast<u32>(cc));
 
-        if (disable_flow_stack) {
+        if (decompiled) {
             break;
         }
 
@@ -200,7 +203,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
         UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
                              static_cast<u32>(cc));
-        if (disable_flow_stack) {
+        if (decompiled) {
             break;
         }
 
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0b567e39d..351c8c2f1 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         Node4 values;
         for (u32 element = 0; element < values.size(); ++element) {
             auto coords_copy = coords;
-            MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element};
+            MetaTexture meta{sampler, {}, depth_compare, aoffi,   {}, {},
+                             {},      {}, component,     element, {}};
             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
         }
 
@@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         const auto derivate_reg = instr.gpr20.Value();
         const auto texture_type = instr.txd.texture_type.Value();
         const auto coord_count = GetCoordCount(texture_type);
-
+        Node index_var{};
         const Sampler* sampler =
-            is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}})
+            is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}})
                         : GetSampler(instr.sampler, {{texture_type, is_array, false}});
         Node4 values;
         if (sampler == nullptr) {
@@ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         }
 
         for (u32 element = 0; element < values.size(); ++element) {
-            MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element};
+            MetaTexture meta{*sampler, array_node, {}, {},      {},       derivates,
+                             {},       {},         {}, element, index_var};
             values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
         }
 
@@ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         // TODO: The new commits on the texture refactor, change the way samplers work.
         // Sadly, not all texture instructions specify the type of texture their sampler
         // uses. This must be fixed at a later instance.
+        Node index_var{};
         const Sampler* sampler =
-            is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler);
+            is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler);
 
         if (sampler == nullptr) {
             u32 indexer = 0;
@@ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                 if (!instr.txq.IsComponentEnabled(element)) {
                     continue;
                 }
-                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
+                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
                 const Node value =
                     Operation(OperationCode::TextureQueryDimensions, meta,
                               GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
 
         auto texture_type = instr.tmml.texture_type.Value();
         const bool is_array = instr.tmml.array != 0;
+        Node index_var{};
         const Sampler* sampler =
-            is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler);
+            is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler);
 
         if (sampler == nullptr) {
             u32 indexer = 0;
@@ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                 continue;
             }
             auto params = coords;
-            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
+            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
             const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
             SetTemporary(bb, indexer++, value);
         }
@@ -383,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
     // Otherwise create a new mapping for this sampler
     const auto next_index = static_cast<u32>(used_samplers.size());
     return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
-                                       info.is_buffer);
+                                       info.is_buffer, false);
 }
 
-const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
+const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
                                             std::optional<SamplerInfo> sampler_info) {
     const Node sampler_register = GetRegister(reg);
-    const auto [base_sampler, buffer, offset] =
-        TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
-    ASSERT(base_sampler != nullptr);
-    if (base_sampler == nullptr) {
+    const auto [base_node, tracked_sampler_info] =
+        TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
+    ASSERT(base_node != nullptr);
+    if (base_node == nullptr) {
         return nullptr;
     }
 
-    const auto info = GetSamplerInfo(sampler_info, offset, buffer);
+    if (const auto bindless_sampler_info =
+            std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
+        const u32 buffer = bindless_sampler_info->GetIndex();
+        const u32 offset = bindless_sampler_info->GetOffset();
+        const auto info = GetSamplerInfo(sampler_info, offset, buffer);
+
+        // If this sampler has already been used, return the existing mapping.
+        const auto it =
+            std::find_if(used_samplers.begin(), used_samplers.end(),
+                         [buffer = buffer, offset = offset](const Sampler& entry) {
+                             return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+                         });
+        if (it != used_samplers.end()) {
+            ASSERT(it->IsBindless() && it->GetType() == info.type &&
+                   it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow);
+            return &*it;
+        }
 
-    // If this sampler has already been used, return the existing mapping.
-    const auto it =
-        std::find_if(used_samplers.begin(), used_samplers.end(),
-                     [buffer = buffer, offset = offset](const Sampler& entry) {
-                         return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
-                     });
-    if (it != used_samplers.end()) {
-        ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
-               it->IsShadow() == info.is_shadow);
-        return &*it;
-    }
+        // Otherwise create a new mapping for this sampler
+        const auto next_index = static_cast<u32>(used_samplers.size());
+        return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
+                                           info.is_shadow, info.is_buffer, false);
+    } else if (const auto array_sampler_info =
+                   std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
+        const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
+        index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
+        const auto info = GetSamplerInfo(sampler_info, base_offset);
+
+        // If this sampler has already been used, return the existing mapping.
+        const auto it = std::find_if(
+            used_samplers.begin(), used_samplers.end(),
+            [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; });
+        if (it != used_samplers.end()) {
+            ASSERT(!it->IsBindless() && it->GetType() == info.type &&
+                   it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow &&
+                   it->IsBuffer() == info.is_buffer && it->IsIndexed());
+            return &*it;
+        }
 
-    // Otherwise create a new mapping for this sampler
-    const auto next_index = static_cast<u32>(used_samplers.size());
-    return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
-                                       info.is_shadow, info.is_buffer);
+        uses_indexed_samplers = true;
+        // Otherwise create a new mapping for this sampler
+        const auto next_index = static_cast<u32>(used_samplers.size());
+        return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array,
+                                           info.is_shadow, info.is_buffer, true);
+    }
+    return nullptr;
 }
 
 void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                          "This method is not supported.");
 
     const SamplerInfo info{texture_type, is_array, is_shadow, false};
-    const Sampler* sampler =
-        is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info);
+    Node index_var{};
+    const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info)
+                                         : GetSampler(instr.sampler, info);
     Node4 values;
     if (sampler == nullptr) {
         for (u32 element = 0; element < values.size(); ++element) {
@@ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
 
     for (u32 element = 0; element < values.size(); ++element) {
         auto copy_coords = coords;
-        MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element};
+        MetaTexture meta{*sampler, array, depth_compare, aoffi,    {}, {}, bias,
+                         lod,      {},    element,       index_var};
         values[element] = Operation(read_method, meta, std::move(copy_coords));
     }
 
@@ -596,7 +630,7 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
     }
 
-    Node dc{};
+    Node dc;
     if (depth_compare) {
         // Depth is always stored in the register signaled by gpr20 or in the next register if lod
         // or bias are used
@@ -632,7 +666,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
 
     const Node array = is_array ? GetRegister(array_register) : nullptr;
 
-    Node dc{};
+    Node dc;
     if (depth_compare) {
         // Depth is always stored in the register signaled by gpr20 or in the next register if lod
         // or bias are used
@@ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
     u64 parameter_register = instr.gpr20.Value();
 
     const SamplerInfo info{texture_type, is_array, depth_compare, false};
-    const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
+    Node index_var{};
+    const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info)
                                          : GetSampler(instr.sampler, info);
     Node4 values;
     if (sampler == nullptr) {
@@ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
         MetaTexture meta{
-            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element};
+            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
+            index_var};
         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
     }
 
@@ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
-        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
     }
 
@@ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
-        MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
     }
     return values;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 075c7d07c..a0a7b9111 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,7 +162,7 @@ enum class OperationCode {
     AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
     AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
 
-    UAtomicAdd, /// (smem, uint) -> uint
+    AtomicAdd, /// (memory, {u}int) -> {u}int
 
     Branch,         /// (uint branch_target) -> void
     BranchIndirect, /// (uint branch_target) -> void
@@ -212,6 +212,7 @@ enum class MetaStackClass {
 class OperationNode;
 class ConditionalNode;
 class GprNode;
+class CustomVarNode;
 class ImmediateNode;
 class InternalFlagNode;
 class PredicateNode;
@@ -223,26 +224,32 @@ class SmemNode;
 class GmemNode;
 class CommentNode;
 
-using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
+using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
                               InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
                               LmemNode, SmemNode, GmemNode, CommentNode>;
 using Node = std::shared_ptr<NodeData>;
 using Node4 = std::array<Node, 4>;
 using NodeBlock = std::vector<Node>;
 
+class BindlessSamplerNode;
+class ArraySamplerNode;
+
+using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
+using TrackSampler = std::shared_ptr<TrackSamplerData>;
+
 class Sampler {
 public:
     /// This constructor is for bound samplers
     constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
-                               bool is_array, bool is_shadow, bool is_buffer)
+                               bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
         : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
-          is_buffer{is_buffer} {}
+          is_buffer{is_buffer}, is_indexed{is_indexed} {}
 
     /// This constructor is for bindless samplers
     constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
-                               bool is_array, bool is_shadow, bool is_buffer)
+                               bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
         : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
-          is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {}
+          is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
 
     constexpr u32 GetIndex() const {
         return index;
@@ -276,16 +283,72 @@ public:
         return is_bindless;
     }
 
+    constexpr bool IsIndexed() const {
+        return is_indexed;
+    }
+
+    constexpr u32 Size() const {
+        return size;
+    }
+
+    constexpr void SetSize(u32 new_size) {
+        size = new_size;
+    }
+
 private:
     u32 index{};  ///< Emulated index given for the this sampler.
     u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
     u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
+    u32 size{};   ///< Size of the sampler if indexed.
 
     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
     bool is_array{};    ///< Whether the texture is being sampled as an array texture or not.
     bool is_shadow{};   ///< Whether the texture is being sampled as a depth texture or not.
     bool is_buffer{};   ///< Whether the texture is a texture buffer without sampler.
     bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
+    bool is_indexed{};  ///< Whether this sampler is an indexed array of textures.
+};
+
+/// Represents a tracked bindless sampler into a direct const buffer
+class ArraySamplerNode final {
+public:
+    explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
+        : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
+
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+    constexpr u32 GetBaseOffset() const {
+        return base_offset;
+    }
+
+    constexpr u32 GetIndexVar() const {
+        return bindless_var;
+    }
+
+private:
+    u32 index;
+    u32 base_offset;
+    u32 bindless_var;
+};
+
+/// Represents a tracked bindless sampler into a direct const buffer
+class BindlessSamplerNode final {
+public:
+    explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
+
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+    constexpr u32 GetOffset() const {
+        return offset;
+    }
+
+private:
+    u32 index;
+    u32 offset;
 };
 
 class Image final {
@@ -380,8 +443,9 @@ struct MetaTexture {
     std::vector<Node> derivates;
     Node bias;
     Node lod;
-    Node component{};
+    Node component;
     u32 element{};
+    Node index;
 };
 
 struct MetaImage {
@@ -488,6 +552,19 @@ private:
     Tegra::Shader::Register index{};
 };
 
+/// A custom variable
+class CustomVarNode final {
+public:
+    explicit constexpr CustomVarNode(u32 index) : index{index} {}
+
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+private:
+    u32 index{};
+};
+
 /// A 32-bits value that represents an immediate value
 class ImmediateNode final {
 public:
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 0c2aa749b..11231bbea 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) {
     return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
 }
 
+template <typename T, typename... Args>
+TrackSampler MakeTrackSampler(Args&&... args) {
+    static_assert(std::is_convertible_v<T, TrackSamplerData>);
+    return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
+}
+
 template <typename... Args>
 Node Operation(OperationCode code, Args&&... args) {
     if constexpr (sizeof...(args) == 0) {
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 31eecb3f4..3a5d280a9 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet
                    ConstBufferLocker& locker)
     : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
     Decode();
+    PostDecode();
 }
 
 ShaderIR::~ShaderIR() = default;
@@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) {
     return MakeNode<GprNode>(reg);
 }
 
+Node ShaderIR::GetCustomVariable(u32 id) {
+    return MakeNode<CustomVarNode>(id);
+}
+
 Node ShaderIR::GetImmediate19(Instruction instr) {
     return Immediate(instr.alu.GetImm20_19());
 }
@@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) {
     return id;
 }
 
+u32 ShaderIR::NewCustomVariable() {
+    return num_custom_variables++;
+}
+
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ba1db4c11..b0851c3be 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -180,6 +180,10 @@ public:
         return amend_code[index];
     }
 
+    u32 GetNumCustomVariables() const {
+        return num_custom_variables;
+    }
+
 private:
     friend class ASTDecoder;
 
@@ -191,6 +195,7 @@ private:
     };
 
     void Decode();
+    void PostDecode();
 
     NodeBlock DecodeRange(u32 begin, u32 end);
     void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
@@ -235,6 +240,8 @@ private:
 
     /// Generates a node for a passed register.
     Node GetRegister(Tegra::Shader::Register reg);
+    /// Generates a node for a custom variable
+    Node GetCustomVariable(u32 id);
     /// Generates a node representing a 19-bit immediate value
     Node GetImmediate19(Tegra::Shader::Instruction instr);
     /// Generates a node representing a 32-bit immediate value
@@ -321,7 +328,7 @@ private:
                               std::optional<SamplerInfo> sampler_info = std::nullopt);
 
     /// Accesses a texture sampler for a bindless texture.
-    const Sampler* GetBindlessSampler(Tegra::Shader::Register reg,
+    const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
                                       std::optional<SamplerInfo> sampler_info = std::nullopt);
 
     /// Accesses an image.
@@ -387,6 +394,9 @@ private:
 
     std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
 
+    std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
+                                                        s64 cursor);
+
     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
 
     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
@@ -399,6 +409,8 @@ private:
     /// Register new amending code and obtain the reference id.
     std::size_t DeclareAmend(Node new_amend);
 
+    u32 NewCustomVariable();
+
     const ProgramCode& program_code;
     const u32 main_offset;
     const CompilerSettings settings;
@@ -414,6 +426,7 @@ private:
     NodeBlock global_code;
     ASTManager program_manager{true, true};
     std::vector<Node> amend_code;
+    u32 num_custom_variables{};
 
     std::set<u32> used_registers;
     std::set<Tegra::Shader::Pred> used_predicates;
@@ -431,6 +444,7 @@ private:
     bool uses_instance_id{};
     bool uses_vertex_id{};
     bool uses_warps{};
+    bool uses_indexed_samplers{};
 
     Tegra::Shader::Header header;
 };
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 165c79330..face8c943 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -8,6 +8,7 @@
 
 #include "common/common_types.h"
 #include "video_core/shader/node.h"
+#include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 
 namespace VideoCommon::Shader {
@@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
     }
     return {};
 }
+
+std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
+    if (operation.GetCode() != OperationCode::UAdd) {
+        return std::nullopt;
+    }
+    Node gpr;
+    Node offset;
+    ASSERT(operation.GetOperandsCount() == 2);
+    for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
+        Node operand = operation[i];
+        if (std::holds_alternative<ImmediateNode>(*operand)) {
+            offset = operation[i];
+        } else if (std::holds_alternative<GprNode>(*operand)) {
+            gpr = operation[i];
+        }
+    }
+    if (offset && gpr) {
+        return std::make_pair(gpr, offset);
+    }
+    return std::nullopt;
+}
+
+bool AmendNodeCv(std::size_t amend_index, Node node) {
+    if (const auto operation = std::get_if<OperationNode>(&*node)) {
+        operation->SetAmendIndex(amend_index);
+        return true;
+    } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+        conditional->SetAmendIndex(amend_index);
+        return true;
+    }
+    return false;
+}
+
 } // Anonymous namespace
 
+std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
+                                                              s64 cursor) {
+    if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
+        // Constant buffer found, test if it's an immediate
+        const auto offset = cbuf->GetOffset();
+        if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
+            auto track =
+                MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
+            return {tracked, track};
+        } else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
+            auto bound_buffer = locker.ObtainBoundBuffer();
+            if (!bound_buffer) {
+                return {};
+            }
+            if (*bound_buffer != cbuf->GetIndex()) {
+                return {};
+            }
+            auto pair = DecoupleIndirectRead(*operation);
+            if (!pair) {
+                return {};
+            }
+            auto [gpr, base_offset] = *pair;
+            const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
+            auto gpu_driver = locker.AccessGuestDriverProfile();
+            if (gpu_driver == nullptr) {
+                return {};
+            }
+            const u32 bindless_cv = NewCustomVariable();
+            const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
+                                      Immediate(gpu_driver->GetTextureHandlerSize()));
+
+            const Node cv_node = GetCustomVariable(bindless_cv);
+            Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
+            const std::size_t amend_index = DeclareAmend(amend_op);
+            AmendNodeCv(amend_index, code[cursor]);
+            // TODO Implement Bindless Index custom variable
+            auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
+                                                            offset_inm->GetValue(), bindless_cv);
+            return {tracked, track};
+        }
+        return {};
+    }
+    if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
+        if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
+            return {};
+        }
+        // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
+        // register that it uses as operand
+        const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
+        if (!source) {
+            return {};
+        }
+        return TrackBindlessSampler(source, code, new_cursor);
+    }
+    if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
+        for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
+            if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
+                std::get<0>(found)) {
+                // Cbuf found in operand.
+                return found;
+            }
+        }
+        return {};
+    }
+    if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
+        const auto& conditional_code = conditional->GetCode();
+        return TrackBindlessSampler(tracked, conditional_code,
+                                    static_cast<s64>(conditional_code.size()));
+    }
+    return {};
+}
+
 std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
                                                s64 cursor) const {
     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 829268b4c..84469b7ba 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -135,7 +135,7 @@ std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& i
         for (u32 level = 0; level < mipmaps; level++) {
             const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
             const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
-            result.emplace_back(width, height, layer, level);
+            result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1);
         }
     }
     return result;
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 8e947394c..a5f81a8a0 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -3,19 +3,32 @@
 // Refer to the license.txt file included.
 
 #include <memory>
+#include "common/logging/log.h"
 #include "core/core.h"
 #include "core/settings.h"
 #include "video_core/gpu_asynch.h"
 #include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
+#ifdef HAS_VULKAN
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#endif
 #include "video_core/video_core.h"
 
 namespace VideoCore {
 
 std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
                                              Core::System& system) {
-    return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
+    switch (Settings::values.renderer_backend) {
+    case Settings::RendererBackend::OpenGL:
+        return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
+#ifdef HAS_VULKAN
+    case Settings::RendererBackend::Vulkan:
+        return std::make_unique<Vulkan::RendererVulkan>(emu_window, system);
+#endif
+    default:
+        return nullptr;
+    }
 }
 
 std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) {