68 files changed, 3538 insertions, 1094 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 91df062d7..f7febd6a2 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -148,6 +148,7 @@ add_library(video_core STATIC
     textures/convert.h
     textures/decoders.cpp
     textures/decoders.h
+    textures/texture.cpp
     textures/texture.h
     video_core.cpp
     video_core.h
@@ -210,6 +211,8 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_texture_cache.h
         renderer_vulkan/vk_update_descriptor.cpp
         renderer_vulkan/vk_update_descriptor.h
+        renderer_vulkan/wrapper.cpp
+        renderer_vulkan/wrapper.h
     )
 
     target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index 4b9193182..e35ee0b67 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,37 +15,29 @@ namespace VideoCommon {
 
 class BufferBlock {
 public:
-    bool Overlaps(const CacheAddr start, const CacheAddr end) const {
-        return (cache_addr < end) && (cache_addr_end > start);
+    bool Overlaps(const VAddr start, const VAddr end) const {
+        return (cpu_addr < end) && (cpu_addr_end > start);
     }
 
-    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
-        return cache_addr <= other_start && other_end <= cache_addr_end;
+    bool IsInside(const VAddr other_start, const VAddr other_end) const {
+        return cpu_addr <= other_start && other_end <= cpu_addr_end;
     }
 
-    u8* GetWritableHostPtr() const {
-        return FromCacheAddr(cache_addr);
+    std::size_t GetOffset(const VAddr in_addr) {
+        return static_cast<std::size_t>(in_addr - cpu_addr);
     }
 
-    u8* GetWritableHostPtr(std::size_t offset) const {
-        return FromCacheAddr(cache_addr + offset);
+    VAddr GetCpuAddr() const {
+        return cpu_addr;
     }
 
-    std::size_t GetOffset(const CacheAddr in_addr) {
-        return static_cast<std::size_t>(in_addr - cache_addr);
+    VAddr GetCpuAddrEnd() const {
+        return cpu_addr_end;
     }
 
-    CacheAddr GetCacheAddr() const {
-        return cache_addr;
-    }
-
-    CacheAddr GetCacheAddrEnd() const {
-        return cache_addr_end;
-    }
-
-    void SetCacheAddr(const CacheAddr new_addr) {
-        cache_addr = new_addr;
-        cache_addr_end = new_addr + size;
+    void SetCpuAddr(const VAddr new_addr) {
+        cpu_addr = new_addr;
+        cpu_addr_end = new_addr + size;
     }
 
     std::size_t GetSize() const {
@@ -61,14 +53,14 @@ public:
     }
 
 protected:
-    explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} {
-        SetCacheAddr(cache_addr);
+    explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
+        SetCpuAddr(cpu_addr);
     }
     ~BufferBlock() = default;
 
 private:
-    CacheAddr cache_addr{};
-    CacheAddr cache_addr_end{};
+    VAddr cpu_addr{};
+    VAddr cpu_addr_end{};
     std::size_t size{};
     u64 epoch{};
 };
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 186aca61d..b57c0d4d4 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -19,6 +19,7 @@
 #include "common/alignment.h"
 #include "common/common_types.h"
 #include "core/core.h"
+#include "core/memory.h"
 #include "video_core/buffer_cache/buffer_block.h"
 #include "video_core/buffer_cache/map_interval.h"
 #include "video_core/memory_manager.h"
@@ -37,28 +38,45 @@ public:
                             bool is_written = false, bool use_fast_cbuf = false) {
         std::lock_guard lock{mutex};
 
-        auto& memory_manager = system.GPU().MemoryManager();
-        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
-        if (!host_ptr) {
+        const std::optional<VAddr> cpu_addr_opt =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+
+        if (!cpu_addr_opt) {
             return {GetEmptyBuffer(size), 0};
         }
-        const auto cache_addr = ToCacheAddr(host_ptr);
+
+        VAddr cpu_addr = *cpu_addr_opt;
 
         // Cache management is a big overhead, so only cache entries with a given size.
         // TODO: Figure out which size is the best for given games.
         constexpr std::size_t max_stream_size = 0x800;
         if (use_fast_cbuf || size < max_stream_size) {
-            if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
+            if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
+                auto& memory_manager = system.GPU().MemoryManager();
                 if (use_fast_cbuf) {
-                    return ConstBufferUpload(host_ptr, size);
+                    if (memory_manager.IsGranularRange(gpu_addr, size)) {
+                        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+                        return ConstBufferUpload(host_ptr, size);
+                    } else {
+                        staging_buffer.resize(size);
+                        memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+                        return ConstBufferUpload(staging_buffer.data(), size);
+                    }
                 } else {
-                    return StreamBufferUpload(host_ptr, size, alignment);
+                    if (memory_manager.IsGranularRange(gpu_addr, size)) {
+                        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+                        return StreamBufferUpload(host_ptr, size, alignment);
+                    } else {
+                        staging_buffer.resize(size);
+                        memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+                        return StreamBufferUpload(staging_buffer.data(), size, alignment);
+                    }
                 }
             }
         }
 
-        auto block = GetBlock(cache_addr, size);
-        auto map = MapAddress(block, gpu_addr, cache_addr, size);
+        auto block = GetBlock(cpu_addr, size);
+        auto map = MapAddress(block, gpu_addr, cpu_addr, size);
         if (is_written) {
             map->MarkAsModified(true, GetModifiedTicks());
             if (!map->IsWritten()) {
@@ -71,7 +89,7 @@ public:
             }
         }
 
-        const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
+        const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
 
         return {ToHandle(block), offset};
     }
@@ -112,7 +130,7 @@ public:
     }
 
     /// Write any cached resources overlapping the specified region back to memory
-    void FlushRegion(CacheAddr addr, std::size_t size) {
+    void FlushRegion(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
         std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -127,7 +145,7 @@ public:
     }
 
     /// Mark the specified region as being invalidated
-    void InvalidateRegion(CacheAddr addr, u64 size) {
+    void InvalidateRegion(VAddr addr, u64 size) {
         std::lock_guard lock{mutex};
 
         std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -152,7 +170,7 @@ protected:
 
     virtual void WriteBarrier() = 0;
 
-    virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
+    virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
 
     virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
                                  const u8* data) = 0;
@@ -169,20 +187,17 @@ protected:
 
     /// Register an object into the cache
     void Register(const MapInterval& new_map, bool inherit_written = false) {
-        const CacheAddr cache_ptr = new_map->GetStart();
-        const std::optional<VAddr> cpu_addr =
-            system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
-        if (!cache_ptr || !cpu_addr) {
+        const VAddr cpu_addr = new_map->GetStart();
+        if (!cpu_addr) {
             LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
                          new_map->GetGpuAddress());
             return;
         }
         const std::size_t size = new_map->GetEnd() - new_map->GetStart();
-        new_map->SetCpuAddress(*cpu_addr);
         new_map->MarkAsRegistered(true);
         const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
         mapped_addresses.insert({interval, new_map});
-        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
+        rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
         if (inherit_written) {
             MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
             new_map->MarkAsWritten(true);
@@ -192,7 +207,7 @@ protected:
     /// Unregisters an object from the cache
     void Unregister(MapInterval& map) {
         const std::size_t size = map->GetEnd() - map->GetStart();
-        rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1);
+        rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
         map->MarkAsRegistered(false);
         if (map->IsWritten()) {
             UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -202,32 +217,39 @@ protected:
     }
 
 private:
-    MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) {
+    MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
         return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
     }
 
-    MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr,
-                           const CacheAddr cache_addr, const std::size_t size) {
+    MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
+                           const std::size_t size) {
 
-        std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
+        std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
         if (overlaps.empty()) {
-            const CacheAddr cache_addr_end = cache_addr + size;
-            MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr);
-            u8* host_ptr = FromCacheAddr(cache_addr);
-            UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
+            auto& memory_manager = system.GPU().MemoryManager();
+            const VAddr cpu_addr_end = cpu_addr + size;
+            MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
+            if (memory_manager.IsGranularRange(gpu_addr, size)) {
+                u8* host_ptr = memory_manager.GetPointer(gpu_addr);
+                UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
+            } else {
+                staging_buffer.resize(size);
+                memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+                UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
+            }
             Register(new_map);
             return new_map;
         }
 
-        const CacheAddr cache_addr_end = cache_addr + size;
+        const VAddr cpu_addr_end = cpu_addr + size;
         if (overlaps.size() == 1) {
             MapInterval& current_map = overlaps[0];
-            if (current_map->IsInside(cache_addr, cache_addr_end)) {
+            if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
                 return current_map;
             }
         }
-        CacheAddr new_start = cache_addr;
-        CacheAddr new_end = cache_addr_end;
+        VAddr new_start = cpu_addr;
+        VAddr new_end = cpu_addr_end;
         bool write_inheritance = false;
         bool modified_inheritance = false;
         // Calculate new buffer parameters
@@ -237,7 +259,7 @@ private:
             write_inheritance |= overlap->IsWritten();
             modified_inheritance |= overlap->IsModified();
         }
-        GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
+        GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
         for (auto& overlap : overlaps) {
             Unregister(overlap);
         }
@@ -250,7 +272,7 @@ private:
         return new_map;
     }
 
-    void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end,
+    void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
                      std::vector<MapInterval>& overlaps) {
         const IntervalType base_interval{start, end};
         IntervalSet interval_set{};
@@ -262,13 +284,15 @@ private:
         for (auto& interval : interval_set) {
             std::size_t size = interval.upper() - interval.lower();
             if (size > 0) {
-                u8* host_ptr = FromCacheAddr(interval.lower());
-                UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr);
+                staging_buffer.resize(size);
+                system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
+                UploadBlockData(block, block->GetOffset(interval.lower()), size,
+                                staging_buffer.data());
             }
         }
     }
 
-    std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
+    std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
         if (size == 0) {
             return {};
         }
@@ -290,8 +314,9 @@ private:
     void FlushMap(MapInterval map) {
         std::size_t size = map->GetEnd() - map->GetStart();
         TBuffer block = blocks[map->GetStart() >> block_page_bits];
-        u8* host_ptr = FromCacheAddr(map->GetStart());
-        DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr);
+        staging_buffer.resize(size);
+        DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
+        system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
         map->MarkAsModified(false, 0);
     }
 
@@ -316,14 +341,14 @@ private:
     TBuffer EnlargeBlock(TBuffer buffer) {
         const std::size_t old_size = buffer->GetSize();
         const std::size_t new_size = old_size + block_page_size;
-        const CacheAddr cache_addr = buffer->GetCacheAddr();
-        TBuffer new_buffer = CreateBlock(cache_addr, new_size);
+        const VAddr cpu_addr = buffer->GetCpuAddr();
+        TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
         CopyBlock(buffer, new_buffer, 0, 0, old_size);
         buffer->SetEpoch(epoch);
         pending_destruction.push_back(buffer);
-        const CacheAddr cache_addr_end = cache_addr + new_size - 1;
-        u64 page_start = cache_addr >> block_page_bits;
-        const u64 page_end = cache_addr_end >> block_page_bits;
+        const VAddr cpu_addr_end = cpu_addr + new_size - 1;
+        u64 page_start = cpu_addr >> block_page_bits;
+        const u64 page_end = cpu_addr_end >> block_page_bits;
         while (page_start <= page_end) {
             blocks[page_start] = new_buffer;
             ++page_start;
@@ -334,9 +359,9 @@ private:
     TBuffer MergeBlocks(TBuffer first, TBuffer second) {
         const std::size_t size_1 = first->GetSize();
         const std::size_t size_2 = second->GetSize();
-        const CacheAddr first_addr = first->GetCacheAddr();
-        const CacheAddr second_addr = second->GetCacheAddr();
-        const CacheAddr new_addr = std::min(first_addr, second_addr);
+        const VAddr first_addr = first->GetCpuAddr();
+        const VAddr second_addr = second->GetCpuAddr();
+        const VAddr new_addr = std::min(first_addr, second_addr);
         const std::size_t new_size = size_1 + size_2;
         TBuffer new_buffer = CreateBlock(new_addr, new_size);
         CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
@@ -345,9 +370,9 @@ private:
         second->SetEpoch(epoch);
         pending_destruction.push_back(first);
         pending_destruction.push_back(second);
-        const CacheAddr cache_addr_end = new_addr + new_size - 1;
+        const VAddr cpu_addr_end = new_addr + new_size - 1;
         u64 page_start = new_addr >> block_page_bits;
-        const u64 page_end = cache_addr_end >> block_page_bits;
+        const u64 page_end = cpu_addr_end >> block_page_bits;
         while (page_start <= page_end) {
             blocks[page_start] = new_buffer;
             ++page_start;
@@ -355,18 +380,18 @@ private:
         return new_buffer;
     }
 
-    TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
+    TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
         TBuffer found{};
-        const CacheAddr cache_addr_end = cache_addr + size - 1;
-        u64 page_start = cache_addr >> block_page_bits;
-        const u64 page_end = cache_addr_end >> block_page_bits;
+        const VAddr cpu_addr_end = cpu_addr + size - 1;
+        u64 page_start = cpu_addr >> block_page_bits;
+        const u64 page_end = cpu_addr_end >> block_page_bits;
         while (page_start <= page_end) {
             auto it = blocks.find(page_start);
             if (it == blocks.end()) {
                 if (found) {
                     found = EnlargeBlock(found);
                 } else {
-                    const CacheAddr start_addr = (page_start << block_page_bits);
+                    const VAddr start_addr = (page_start << block_page_bits);
                     found = CreateBlock(start_addr, block_page_size);
                     blocks[page_start] = found;
                 }
@@ -386,7 +411,7 @@ private:
         return found;
     }
 
-    void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
+    void MarkRegionAsWritten(const VAddr start, const VAddr end) {
         u64 page_start = start >> write_page_bit;
         const u64 page_end = end >> write_page_bit;
         while (page_start <= page_end) {
@@ -400,7 +425,7 @@ private:
         }
     }
 
-    void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
+    void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
         u64 page_start = start >> write_page_bit;
         const u64 page_end = end >> write_page_bit;
         while (page_start <= page_end) {
@@ -416,7 +441,7 @@ private:
         }
     }
 
-    bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const {
+    bool IsRegionWritten(const VAddr start, const VAddr end) const {
         u64 page_start = start >> write_page_bit;
         const u64 page_end = end >> write_page_bit;
         while (page_start <= page_end) {
@@ -440,8 +465,8 @@ private:
     u64 buffer_offset = 0;
     u64 buffer_offset_base = 0;
 
-    using IntervalSet = boost::icl::interval_set<CacheAddr>;
-    using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>;
+    using IntervalSet = boost::icl::interval_set<VAddr>;
+    using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
     using IntervalType = typename IntervalCache::interval_type;
     IntervalCache mapped_addresses;
 
@@ -456,6 +481,8 @@ private:
     u64 epoch = 0;
     u64 modified_ticks = 0;
 
+    std::vector<u8> staging_buffer;
+
     std::recursive_mutex mutex;
 };
 
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index 3a104d5cd..b0956029d 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -11,7 +11,7 @@ namespace VideoCommon {
 
 class MapIntervalBase {
 public:
-    MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr)
+    MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
         : start{start}, end{end}, gpu_addr{gpu_addr} {}
 
     void SetCpuAddress(VAddr new_cpu_addr) {
@@ -26,7 +26,7 @@ public:
         return gpu_addr;
     }
 
-    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
+    bool IsInside(const VAddr other_start, const VAddr other_end) const {
         return (start <= other_start && other_end <= end);
     }
 
@@ -46,11 +46,11 @@ public:
         return is_registered;
     }
 
-    CacheAddr GetStart() const {
+    VAddr GetStart() const {
         return start;
     }
 
-    CacheAddr GetEnd() const {
+    VAddr GetEnd() const {
         return end;
     }
 
@@ -76,8 +76,8 @@ public:
     }
 
 private:
-    CacheAddr start;
-    CacheAddr end;
+    VAddr start;
+    VAddr end;
     GPUVAddr gpu_addr;
     VAddr cpu_addr{};
     bool is_written{};
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d24c9f657..4637ddabd 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -312,6 +312,35 @@ public:
             }
         };
 
+        struct MsaaSampleLocation {
+            union {
+                BitField<0, 4, u32> x0;
+                BitField<4, 4, u32> y0;
+                BitField<8, 4, u32> x1;
+                BitField<12, 4, u32> y1;
+                BitField<16, 4, u32> x2;
+                BitField<20, 4, u32> y2;
+                BitField<24, 4, u32> x3;
+                BitField<28, 4, u32> y3;
+            };
+
+            constexpr std::pair<u32, u32> Location(int index) const {
+                switch (index) {
+                case 0:
+                    return {x0, y0};
+                case 1:
+                    return {x1, y1};
+                case 2:
+                    return {x2, y2};
+                case 3:
+                    return {x3, y3};
+                default:
+                    UNREACHABLE();
+                    return {0, 0};
+                }
+            }
+        };
+
         enum class DepthMode : u32 {
             MinusOneToOne = 0,
             ZeroToOne = 1,
@@ -793,7 +822,13 @@ public:
 
                 u32 rt_separate_frag_data;
 
-                INSERT_UNION_PADDING_WORDS(0xC);
+                INSERT_UNION_PADDING_WORDS(0x1);
+
+                u32 multisample_raster_enable;
+                u32 multisample_raster_samples;
+                std::array<u32, 4> multisample_sample_mask;
+
+                INSERT_UNION_PADDING_WORDS(0x5);
 
                 struct {
                     u32 address_high;
@@ -830,7 +865,16 @@ public:
 
                 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
 
-                INSERT_UNION_PADDING_WORDS(0xF);
+                std::array<MsaaSampleLocation, 4> multisample_sample_locations;
+
+                INSERT_UNION_PADDING_WORDS(0x2);
+
+                union {
+                    BitField<0, 1, u32> enable;
+                    BitField<4, 3, u32> target;
+                } multisample_coverage_to_color;
+
+                INSERT_UNION_PADDING_WORDS(0x8);
 
                 struct {
                     union {
@@ -943,7 +987,7 @@ public:
 
                 CounterReset counter_reset;
 
-                INSERT_UNION_PADDING_WORDS(0x1);
+                u32 multisample_enable;
 
                 u32 zeta_enable;
 
@@ -1007,7 +1051,11 @@ public:
 
                 float polygon_offset_units;
 
-                INSERT_UNION_PADDING_WORDS(0x11);
+                INSERT_UNION_PADDING_WORDS(0x4);
+
+                Tegra::Texture::MsaaMode multisample_mode;
+
+                INSERT_UNION_PADDING_WORDS(0xC);
 
                 union {
                     BitField<2, 1, u32> coord_origin;
@@ -1507,12 +1555,17 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
 ASSERT_REG_POSITION(color_mask_common, 0x3E4);
-ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
 ASSERT_REG_POSITION(depth_bounds, 0x3E7);
+ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
+ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
+ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
+ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(clear_flags, 0x43E);
 ASSERT_REG_POSITION(fill_rectangle, 0x44F);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
+ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
+ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
 ASSERT_REG_POSITION(rt_control, 0x487);
 ASSERT_REG_POSITION(zeta_width, 0x48a);
 ASSERT_REG_POSITION(zeta_height, 0x48b);
@@ -1545,11 +1598,12 @@ ASSERT_REG_POSITION(samplecnt_enable, 0x545);
 ASSERT_REG_POSITION(point_size, 0x546);
 ASSERT_REG_POSITION(point_sprite_enable, 0x548);
 ASSERT_REG_POSITION(counter_reset, 0x54C);
+ASSERT_REG_POSITION(multisample_enable, 0x54D);
 ASSERT_REG_POSITION(zeta_enable, 0x54E);
 ASSERT_REG_POSITION(multisample_control, 0x54F);
 ASSERT_REG_POSITION(condition, 0x554);
 ASSERT_REG_POSITION(tsc, 0x557);
-ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
+ASSERT_REG_POSITION(polygon_offset_factor, 0x55B);
 ASSERT_REG_POSITION(tic, 0x55D);
 ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
 ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
@@ -1558,6 +1612,7 @@ ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
 ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
 ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
 ASSERT_REG_POSITION(polygon_offset_units, 0x56F);
+ASSERT_REG_POSITION(multisample_mode, 0x574);
 ASSERT_REG_POSITION(point_coord_replace, 0x581);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7400e1aa9..c66c66f6c 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -231,18 +231,6 @@ enum class AtomicOp : u64 {
     Or = 6,
     Xor = 7,
     Exch = 8,
-};
-
-enum class GlobalAtomicOp : u64 {
-    Add = 0,
-    Min = 1,
-    Max = 2,
-    Inc = 3,
-    Dec = 4,
-    And = 5,
-    Or = 6,
-    Xor = 7,
-    Exch = 8,
     SafeAdd = 10,
 };
 
@@ -1018,7 +1006,7 @@ union Instruction {
     } stg;
 
     union {
-        BitField<52, 4, GlobalAtomicOp> operation;
+        BitField<52, 4, AtomicOp> operation;
         BitField<49, 3, GlobalAtomicType> type;
         BitField<28, 20, s64> offset;
     } atom;
@@ -1777,6 +1765,7 @@ public:
         BRK,
         DEPBAR,
         VOTE,
+        VOTE_VTG,
         SHFL,
         FSWZADD,
         BFE_C,
@@ -1823,6 +1812,7 @@ public:
         IPA,
         OUT_R, // Emit vertex/primitive
         ISBERD,
+        BAR,
         MEMBAR,
         VMAD,
         VSETP,
@@ -1908,7 +1898,7 @@ public:
         MOV_C,
         MOV_R,
         MOV_IMM,
-        MOV_SYS,
+        S2R,
         MOV32_IMM,
         SHL_C,
         SHL_R,
@@ -2092,6 +2082,7 @@ private:
             INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
             INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
             INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
+            INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
             INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
             INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
             INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
@@ -2129,6 +2120,7 @@ private:
             INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
             INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
             INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
+            INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
             INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
             INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
             INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
@@ -2201,7 +2193,7 @@ private:
             INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
             INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
             INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
-            INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"),
+            INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
             INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
             INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
             INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index bc80661d8..72e2a33d5 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -4,6 +4,9 @@
 
 #pragma once
 
+#include <array>
+#include <optional>
+
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@@ -16,7 +19,7 @@ enum class OutputTopology : u32 {
     TriangleStrip = 7,
 };
 
-enum class AttributeUse : u8 {
+enum class PixelImap : u8 {
     Unused = 0,
     Constant = 1,
     Perspective = 2,
@@ -24,7 +27,7 @@ enum class AttributeUse : u8 {
 };
 
 // Documentation in:
-// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
+// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
 struct Header {
     union {
         BitField<0, 5, u32> sph_type;
@@ -59,8 +62,8 @@ struct Header {
     union {
         BitField<0, 12, u32> max_output_vertices;
         BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
-        BitField<24, 4, u32> reserved;
-        BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
+        BitField<20, 4, u32> reserved;
+        BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
     } common4{};
 
     union {
@@ -93,17 +96,20 @@ struct Header {
         struct {
             INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
             INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
+
             union {
-                BitField<0, 2, AttributeUse> x;
-                BitField<2, 2, AttributeUse> y;
-                BitField<4, 2, AttributeUse> w;
-                BitField<6, 2, AttributeUse> z;
+                BitField<0, 2, PixelImap> x;
+                BitField<2, 2, PixelImap> y;
+                BitField<4, 2, PixelImap> z;
+                BitField<6, 2, PixelImap> w;
                 u8 raw;
             } imap_generic_vector[32];
+
             INSERT_UNION_PADDING_BYTES(2);  // ImapColor
             INSERT_UNION_PADDING_BYTES(2);  // ImapSystemValuesC
             INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
             INSERT_UNION_PADDING_BYTES(2);  // ImapReserved
+
             struct {
                 u32 target;
                 union {
@@ -112,31 +118,30 @@ struct Header {
                     BitField<2, 30, u32> reserved;
                 };
             } omap;
+
             bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
                 const u32 bit = render_target * 4 + component;
                 return omap.target & (1 << bit);
             }
-            AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
-                return static_cast<AttributeUse>(
-                    (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
-            }
-            AttributeUse GetAttributeUse(u32 attribute) const {
-                AttributeUse result = AttributeUse::Unused;
-                for (u32 i = 0; i < 4; i++) {
-                    const auto index = GetAttributeIndexUse(attribute, i);
-                    if (index == AttributeUse::Unused) {
-                        continue;
-                    }
-                    if (result == AttributeUse::Unused || result == index) {
-                        result = index;
+
+            PixelImap GetPixelImap(u32 attribute) const {
+                const auto get_index = [this, attribute](u32 index) {
+                    return static_cast<PixelImap>(
+                        (imap_generic_vector[attribute].raw >> (index * 2)) & 3);
+                };
+
+                std::optional<PixelImap> result;
+                for (u32 component = 0; component < 4; ++component) {
+                    const PixelImap index = get_index(component);
+                    if (index == PixelImap::Unused) {
                         continue;
                     }
-                    LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
-                    if (index == AttributeUse::Perspective) {
-                        result = index;
+                    if (result && result != index) {
+                        LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
                     }
+                    result = index;
                 }
-                return result;
+                return result.value_or(PixelImap::Unused);
             }
         } ps;
 
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index e8f763ce9..8acf2eda2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -7,6 +7,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/frontend/emu_window.h"
 #include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
@@ -16,14 +17,15 @@
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_base.h"
+#include "video_core/video_core.h"
 
 namespace Tegra {
 
 MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
 
-GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
-    : system{system}, renderer{renderer}, is_async{is_async} {
-    auto& rasterizer{renderer.Rasterizer()};
+GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async)
+    : system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
+    auto& rasterizer{renderer->Rasterizer()};
     memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
     dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
     maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
@@ -137,7 +139,7 @@ u64 GPU::GetTicks() const {
 }
 
 void GPU::FlushCommands() {
-    renderer.Rasterizer().FlushCommands();
+    renderer->Rasterizer().FlushCommands();
 }
 
 // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 64acb17df..1a2d747be 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -25,8 +25,11 @@ inline u8* FromCacheAddr(CacheAddr cache_addr) {
 }
 
 namespace Core {
-class System;
+namespace Frontend {
+class EmuWindow;
 }
+class System;
+} // namespace Core
 
 namespace VideoCore {
 class RendererBase;
@@ -129,7 +132,8 @@ class MemoryManager;
 
 class GPU {
 public:
-    explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async);
+    explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
+                 bool is_async);
 
     virtual ~GPU();
 
@@ -174,6 +178,14 @@ public:
     /// Returns a reference to the GPU DMA pusher.
     Tegra::DmaPusher& DmaPusher();
 
+    VideoCore::RendererBase& Renderer() {
+        return *renderer;
+    }
+
+    const VideoCore::RendererBase& Renderer() const {
+        return *renderer;
+    }
+
     // Waits for the GPU to finish working
     virtual void WaitIdle() const = 0;
 
@@ -258,13 +270,13 @@ public:
     virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
+    virtual void FlushRegion(VAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
+    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
+    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
 
 protected:
     virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
@@ -287,7 +299,7 @@ private:
 protected:
     std::unique_ptr<Tegra::DmaPusher> dma_pusher;
     Core::System& system;
-    VideoCore::RendererBase& renderer;
+    std::unique_ptr<VideoCore::RendererBase> renderer;
 
 private:
     std::unique_ptr<Tegra::MemoryManager> memory_manager;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 04222d060..cc434faf7 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -10,13 +10,16 @@
 
 namespace VideoCommon {
 
-GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
-    : GPU(system, renderer, true), gpu_thread{system} {}
+GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
+                     std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
+    : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)),
+      cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {}
 
 GPUAsynch::~GPUAsynch() = default;
 
 void GPUAsynch::Start() {
-    gpu_thread.StartThread(renderer, *dma_pusher);
+    cpu_context->MakeCurrent();
+    gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
 }
 
 void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
@@ -27,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     gpu_thread.SwapBuffers(framebuffer);
 }
 
-void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
+void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
     gpu_thread.FlushRegion(addr, size);
 }
 
-void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
+void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
     gpu_thread.InvalidateRegion(addr, size);
 }
 
-void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     gpu_thread.FlushAndInvalidateRegion(addr, size);
 }
 
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 1241ade1d..03fd0eef0 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -7,6 +7,10 @@
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
 
+namespace Core::Frontend {
+class GraphicsContext;
+}
+
 namespace VideoCore {
 class RendererBase;
 } // namespace VideoCore
@@ -16,15 +20,16 @@ namespace VideoCommon {
 /// Implementation of GPU interface that runs the GPU asynchronously
 class GPUAsynch final : public Tegra::GPU {
 public:
-    explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
+    explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
+                       std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
     ~GPUAsynch() override;
 
     void Start() override;
     void PushGPUEntries(Tegra::CommandList&& entries) override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-    void FlushRegion(CacheAddr addr, u64 size) override;
-    void InvalidateRegion(CacheAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void WaitIdle() const override;
 
 protected:
@@ -32,6 +37,8 @@ protected:
 
 private:
     GPUThread::ThreadManager gpu_thread;
+    std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
+    std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index d48221077..6f38a672a 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -7,12 +7,15 @@
 
 namespace VideoCommon {
 
-GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
-    : GPU(system, renderer, false) {}
+GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
+                   std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
+    : GPU(system, std::move(renderer), false), context{std::move(context)} {}
 
 GPUSynch::~GPUSynch() = default;
 
-void GPUSynch::Start() {}
+void GPUSynch::Start() {
+    context->MakeCurrent();
+}
 
 void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
     dma_pusher->Push(std::move(entries));
@@ -20,19 +23,19 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
 }
 
 void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
-    renderer.SwapBuffers(framebuffer);
+    renderer->SwapBuffers(framebuffer);
 }
 
-void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
-    renderer.Rasterizer().FlushRegion(addr, size);
+void GPUSynch::FlushRegion(VAddr addr, u64 size) {
+    renderer->Rasterizer().FlushRegion(addr, size);
 }
 
-void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
-    renderer.Rasterizer().InvalidateRegion(addr, size);
+void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
+    renderer->Rasterizer().InvalidateRegion(addr, size);
 }
 
-void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
-    renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
+void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+    renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
 }
 
 } // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index c71baee89..4a6e9a01d 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -6,6 +6,10 @@
 
 #include "video_core/gpu.h"
 
+namespace Core::Frontend {
+class GraphicsContext;
+}
+
 namespace VideoCore {
 class RendererBase;
 } // namespace VideoCore
@@ -15,20 +19,24 @@ namespace VideoCommon {
 /// Implementation of GPU interface that runs the GPU synchronously
 class GPUSynch final : public Tegra::GPU {
 public:
-    explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
+    explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
+                      std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
     ~GPUSynch() override;
 
     void Start() override;
     void PushGPUEntries(Tegra::CommandList&& entries) override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-    void FlushRegion(CacheAddr addr, u64 size) override;
-    void InvalidateRegion(CacheAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void WaitIdle() const override {}
 
 protected:
     void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
                              [[maybe_unused]] u32 value) const override {}
+
+private:
+    std::unique_ptr<Core::Frontend::GraphicsContext> context;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index b1088af3d..10cda686b 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,7 +5,7 @@
 #include "common/assert.h"
 #include "common/microprofile.h"
 #include "core/core.h"
-#include "core/frontend/scope_acquire_context.h"
+#include "core/frontend/emu_window.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
@@ -14,8 +14,8 @@
 namespace VideoCommon::GPUThread {
 
 /// Runs the GPU thread
-static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
-                      SynchState& state) {
+static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
+                      Tegra::DmaPusher& dma_pusher, SynchState& state) {
     MicroProfileOnThreadCreate("GpuThread");
 
     // Wait for first GPU command before acquiring the window context
@@ -27,7 +27,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
         return;
     }
 
-    Core::Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()};
+    auto current_context = context.Acquire();
 
     CommandDataContainer next;
     while (state.is_running) {
@@ -62,8 +62,11 @@ ThreadManager::~ThreadManager() {
     thread.join();
 }
 
-void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
-    thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
+void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
+                                Core::Frontend::GraphicsContext& context,
+                                Tegra::DmaPusher& dma_pusher) {
+    thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher),
+                         std::ref(state)};
 }
 
 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
@@ -74,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
 }
 
-void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
+void ThreadManager::FlushRegion(VAddr addr, u64 size) {
     PushCommand(FlushRegionCommand(addr, size));
 }
 
-void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
+void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
     system.Renderer().Rasterizer().InvalidateRegion(addr, size);
 }
 
-void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
     InvalidateRegion(addr, size);
 }
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 882e2d9c7..cd74ad330 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -10,7 +10,6 @@
 #include <optional>
 #include <thread>
 #include <variant>
-
 #include "common/threadsafe_queue.h"
 #include "video_core/gpu.h"
 
@@ -20,6 +19,9 @@ class DmaPusher;
 } // namespace Tegra
 
 namespace Core {
+namespace Frontend {
+class GraphicsContext;
+}
 class System;
 } // namespace Core
 
@@ -45,26 +47,26 @@ struct SwapBuffersCommand final {
 
 /// Command to signal to the GPU thread to flush a region
 struct FlushRegionCommand final {
-    explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
+    explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
 
-    CacheAddr addr;
+    VAddr addr;
     u64 size;
 };
 
 /// Command to signal to the GPU thread to invalidate a region
 struct InvalidateRegionCommand final {
-    explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
+    explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
 
-    CacheAddr addr;
+    VAddr addr;
     u64 size;
 };
 
 /// Command to signal to the GPU thread to flush and invalidate a region
 struct FlushAndInvalidateRegionCommand final {
-    explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
+    explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
         : addr{addr}, size{size} {}
 
-    CacheAddr addr;
+    VAddr addr;
     u64 size;
 };
 
@@ -99,7 +101,8 @@ public:
     ~ThreadManager();
 
     /// Creates and starts the GPU thread.
-    void StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+    void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
+                     Tegra::DmaPusher& dma_pusher);
 
     /// Push GPU command entries to be processed
     void SubmitList(Tegra::CommandList&& entries);
@@ -108,13 +111,13 @@ public:
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    void FlushRegion(CacheAddr addr, u64 size);
+    void FlushRegion(VAddr addr, u64 size);
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
-    void InvalidateRegion(CacheAddr addr, u64 size);
+    void InvalidateRegion(VAddr addr, u64 size);
 
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
+    void FlushAndInvalidateRegion(VAddr addr, u64 size);
 
     // Wait until the gpu thread is idle.
     void WaitIdle() const;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index f5d33f27a..a3389d0d2 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
     ASSERT((gpu_addr & page_mask) == 0);
 
     const u64 aligned_size{Common::AlignUp(size, page_size)};
-    const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
     const auto cpu_addr = GpuToCpuAddress(gpu_addr);
     ASSERT(cpu_addr);
 
     // Flush and invalidate through the GPU interface, to be asynchronous if possible.
-    system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size);
+    system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
 
     UnmapRange(gpu_addr, aligned_size);
     ASSERT(system.CurrentProcess()
@@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const {
         return {};
     }
 
-    const u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    const u8* page_pointer{GetPointer(addr)};
     if (page_pointer) {
         // NOTE: Avoid adding any extra logic to this fast-path block
         T value;
-        std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
+        std::memcpy(&value, page_pointer, sizeof(T));
         return value;
     }
 
@@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
         return;
     }
 
-    u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    u8* page_pointer{GetPointer(addr)};
     if (page_pointer) {
         // NOTE: Avoid adding any extra logic to this fast-path block
-        std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
+        std::memcpy(page_pointer, &data, sizeof(T));
         return;
     }
 
@@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
         return {};
     }
 
-    u8* const page_pointer{page_table.pointers[addr >> page_bits]};
-    if (page_pointer != nullptr) {
-        return page_pointer + (addr & page_mask);
+    auto& memory = system.Memory();
+
+    const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
+
+    if (page_addr != 0) {
+        return memory.GetPointer(page_addr + (addr & page_mask));
     }
 
     LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
         return {};
     }
 
-    const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
-    if (page_pointer != nullptr) {
-        return page_pointer + (addr & page_mask);
+    const auto& memory = system.Memory();
+
+    const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
+
+    if (page_addr != 0) {
+        return memory.GetPointer(page_addr + (addr & page_mask));
     }
 
     LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
     std::size_t page_index{src_addr >> page_bits};
     std::size_t page_offset{src_addr & page_mask};
 
+    auto& memory = system.Memory();
+
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 
         switch (page_table.attributes[page_index]) {
         case Common::PageType::Memory: {
-            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+            const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
             // Flush must happen on the rasterizer interface, such that memory is always synchronous
             // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
-            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
-            std::memcpy(dest_buffer, src_ptr, copy_amount);
+            rasterizer.FlushRegion(src_addr, copy_amount);
+            memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
             break;
         }
         default:
@@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
     std::size_t page_index{src_addr >> page_bits};
     std::size_t page_offset{src_addr & page_mask};
 
+    auto& memory = system.Memory();
+
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
         const u8* page_pointer = page_table.pointers[page_index];
         if (page_pointer) {
-            const u8* src_ptr{page_pointer + page_offset};
-            std::memcpy(dest_buffer, src_ptr, copy_amount);
+            const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
+            memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
         } else {
             std::memset(dest_buffer, 0, copy_amount);
         }
@@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
     std::size_t page_index{dest_addr >> page_bits};
     std::size_t page_offset{dest_addr & page_mask};
 
+    auto& memory = system.Memory();
+
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 
         switch (page_table.attributes[page_index]) {
         case Common::PageType::Memory: {
-            u8* dest_ptr{page_table.pointers[page_index] + page_offset};
+            const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
             // Invalidate must happen on the rasterizer interface, such that memory is always
             // synchronous when it is written (even when in asynchronous GPU mode).
-            rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
-            std::memcpy(dest_ptr, src_buffer, copy_amount);
+            rasterizer.InvalidateRegion(dest_addr, copy_amount);
+            memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
             break;
         }
         default:
@@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
     std::size_t page_index{dest_addr >> page_bits};
     std::size_t page_offset{dest_addr & page_mask};
 
+    auto& memory = system.Memory();
+
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
         u8* page_pointer = page_table.pointers[page_index];
         if (page_pointer) {
-            u8* dest_ptr{page_pointer + page_offset};
-            std::memcpy(dest_ptr, src_buffer, copy_amount);
+            const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
+            memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
         }
         page_index++;
         page_offset = 0;
@@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
 }
 
 void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
-    std::size_t remaining_size{size};
-    std::size_t page_index{src_addr >> page_bits};
-    std::size_t page_offset{src_addr & page_mask};
-
-    while (remaining_size > 0) {
-        const std::size_t copy_amount{
-            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
-
-        switch (page_table.attributes[page_index]) {
-        case Common::PageType::Memory: {
-            // Flush must happen on the rasterizer interface, such that memory is always synchronous
-            // when it is copied (even when in asynchronous GPU mode).
-            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
-            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
-            WriteBlock(dest_addr, src_ptr, copy_amount);
-            break;
-        }
-        default:
-            UNREACHABLE();
-        }
-
-        page_index++;
-        page_offset = 0;
-        dest_addr += static_cast<VAddr>(copy_amount);
-        src_addr += static_cast<VAddr>(copy_amount);
-        remaining_size -= copy_amount;
-    }
+    std::vector<u8> tmp_buffer(size);
+    ReadBlock(src_addr, tmp_buffer.data(), size);
+    WriteBlock(dest_addr, tmp_buffer.data(), size);
 }
 
 void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
@@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const
     WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
 }
 
+bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
+    const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
+    const std::size_t page = (addr & Memory::PAGE_MASK) + size;
+    return page <= Memory::PAGE_SIZE;
+}
+
 void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
                              VAddr backing_addr) {
     LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 073bdb491..0d9468535 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -97,6 +97,11 @@ public:
     void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
     void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
 
+    /**
+     * IsGranularRange checks if a gpu region can be simply read with a pointer
+     */
+    bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
+
 private:
     using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
     using VMAHandle = VMAMap::const_iterator;
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index e66054ed0..5ea2b01f2 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -98,12 +98,12 @@ public:
                                                       static_cast<QueryCache&>(*this),
                                                       VideoCore::QueryType::SamplesPassed}}} {}
 
-    void InvalidateRegion(CacheAddr addr, std::size_t size) {
+    void InvalidateRegion(VAddr addr, std::size_t size) {
         std::unique_lock lock{mutex};
         FlushAndRemoveRegion(addr, size);
     }
 
-    void FlushRegion(CacheAddr addr, std::size_t size) {
+    void FlushRegion(VAddr addr, std::size_t size) {
         std::unique_lock lock{mutex};
         FlushAndRemoveRegion(addr, size);
     }
@@ -117,14 +117,16 @@ public:
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
         std::unique_lock lock{mutex};
         auto& memory_manager = system.GPU().MemoryManager();
-        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+        const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
+        ASSERT(cpu_addr_opt);
+        VAddr cpu_addr = *cpu_addr_opt;
 
-        CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
+        CachedQuery* query = TryGet(cpu_addr);
         if (!query) {
-            const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
-            ASSERT_OR_EXECUTE(cpu_addr, return;);
+            ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
+            const auto host_ptr = memory_manager.GetPointer(gpu_addr);
 
-            query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
+            query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
         }
 
         query->BindCounter(Stream(type).Current(), timestamp);
@@ -173,11 +175,11 @@ protected:
 
 private:
     /// Flushes a memory range to guest memory and removes it from the cache.
-    void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
+    void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
         const u64 addr_begin = static_cast<u64>(addr);
         const u64 addr_end = addr_begin + static_cast<u64>(size);
         const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
-            const u64 cache_begin = query.GetCacheAddr();
+            const u64 cache_begin = query.GetCpuAddr();
             const u64 cache_end = cache_begin + query.SizeInBytes();
             return cache_begin < addr_end && addr_begin < cache_end;
         };
@@ -193,7 +195,7 @@ private:
                 if (!in_range(query)) {
                     continue;
                 }
-                rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
+                rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
                 query.Flush();
             }
             contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
@@ -204,22 +206,21 @@ private:
     /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
     CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
         rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
-        const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
+        const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
         return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
                                                   host_ptr);
     }
 
     /// Tries to a get a cached query. Returns nullptr on failure.
-    CachedQuery* TryGet(CacheAddr addr) {
+    CachedQuery* TryGet(VAddr addr) {
         const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
         const auto it = cached_queries.find(page);
         if (it == std::end(cached_queries)) {
             return nullptr;
         }
         auto& contents = it->second;
-        const auto found =
-            std::find_if(std::begin(contents), std::end(contents),
-                         [addr](auto& query) { return query.GetCacheAddr() == addr; });
+        const auto found = std::find_if(std::begin(contents), std::end(contents),
+                                        [addr](auto& query) { return query.GetCpuAddr() == addr; });
         return found != std::end(contents) ? &*found : nullptr;
     }
 
@@ -323,14 +324,10 @@ public:
         timestamp = timestamp_;
     }
 
-    VAddr CpuAddr() const noexcept {
+    VAddr GetCpuAddr() const noexcept {
         return cpu_addr;
     }
 
-    CacheAddr GetCacheAddr() const noexcept {
-        return ToCacheAddr(host_ptr);
-    }
-
     u64 SizeInBytes() const noexcept {
         return SizeInBytes(timestamp.has_value());
     }
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 6de1597a2..22987751e 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -18,22 +18,14 @@
 
 class RasterizerCacheObject {
 public:
-    explicit RasterizerCacheObject(const u8* host_ptr)
-        : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
+    explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
 
     virtual ~RasterizerCacheObject();
 
-    CacheAddr GetCacheAddr() const {
-        return cache_addr;
+    VAddr GetCpuAddr() const {
+        return cpu_addr;
     }
 
-    const u8* GetHostPtr() const {
-        return host_ptr;
-    }
-
-    /// Gets the address of the shader in guest memory, required for cache management
-    virtual VAddr GetCpuAddr() const = 0;
-
     /// Gets the size of the shader in guest memory, required for cache management
     virtual std::size_t GetSizeInBytes() const = 0;
 
@@ -68,8 +60,7 @@ private:
     bool is_registered{};      ///< Whether the object is currently registered with the cache
     bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
     u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
-    const u8* host_ptr{};      ///< Pointer to the memory backing this cached region
-    CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space
+    VAddr cpu_addr{};          ///< Cpu address memory, unique from emulated virtual address space
 };
 
 template <class T>
@@ -80,7 +71,7 @@ public:
     explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
 
     /// Write any cached resources overlapping the specified region back to memory
-    void FlushRegion(CacheAddr addr, std::size_t size) {
+    void FlushRegion(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
         const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -90,7 +81,7 @@ public:
     }
 
     /// Mark the specified region as being invalidated
-    void InvalidateRegion(CacheAddr addr, u64 size) {
+    void InvalidateRegion(VAddr addr, u64 size) {
         std::lock_guard lock{mutex};
 
         const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -114,27 +105,20 @@ public:
 
 protected:
     /// Tries to get an object from the cache with the specified cache address
-    T TryGet(CacheAddr addr) const {
+    T TryGet(VAddr addr) const {
         const auto iter = map_cache.find(addr);
         if (iter != map_cache.end())
             return iter->second;
         return nullptr;
     }
 
-    T TryGet(const void* addr) const {
-        const auto iter = map_cache.find(ToCacheAddr(addr));
-        if (iter != map_cache.end())
-            return iter->second;
-        return nullptr;
-    }
-
     /// Register an object into the cache
     virtual void Register(const T& object) {
         std::lock_guard lock{mutex};
 
         object->SetIsRegistered(true);
         interval_cache.add({GetInterval(object), ObjectSet{object}});
-        map_cache.insert({object->GetCacheAddr(), object});
+        map_cache.insert({object->GetCpuAddr(), object});
         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
     }
 
@@ -144,7 +128,7 @@ protected:
 
         object->SetIsRegistered(false);
         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
-        const CacheAddr addr = object->GetCacheAddr();
+        const VAddr addr = object->GetCpuAddr();
         interval_cache.subtract({GetInterval(object), ObjectSet{object}});
         map_cache.erase(addr);
     }
@@ -173,7 +157,7 @@ protected:
 
 private:
     /// Returns a list of cached objects from the specified memory region, ordered by access time
-    std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
+    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
         if (size == 0) {
             return {};
         }
@@ -197,13 +181,13 @@ private:
     }
 
     using ObjectSet = std::set<T>;
-    using ObjectCache = std::unordered_map<CacheAddr, T>;
-    using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
+    using ObjectCache = std::unordered_map<VAddr, T>;
+    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
     using ObjectInterval = typename IntervalCache::interval_type;
 
     static auto GetInterval(const T& object) {
-        return ObjectInterval::right_open(object->GetCacheAddr(),
-                                          object->GetCacheAddr() + object->GetSizeInBytes());
+        return ObjectInterval::right_open(object->GetCpuAddr(),
+                                          object->GetCpuAddr() + object->GetSizeInBytes());
     }
 
     ObjectCache map_cache;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 1a68e3caa..8ae5b9c4e 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -53,14 +53,14 @@ public:
     virtual void FlushAll() = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
+    virtual void FlushRegion(VAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
+    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     /// and invalidated
-    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
+    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
 
     /// Notify the rasterizer to send all written commands to the host GPU.
     virtual void FlushCommands() = 0;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 5ec99a126..1d85219b6 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -46,7 +46,8 @@ public:
 
     /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer
     /// specific implementation)
-    virtual void TryPresent(int timeout_ms) = 0;
+    /// Returns true if a frame was drawn
+    virtual bool TryPresent(int timeout_ms) = 0;
 
     // Getter/setter functions:
     // ------------------------
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 0375fca17..4eb37a96c 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
 
-CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
-    : VideoCommon::BufferBlock{cache_addr, size} {
+CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
+    : VideoCommon::BufferBlock{cpu_addr, size} {
     gl_buffer.Create();
     glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
 }
@@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
     glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
 }
 
-Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
-    return std::make_shared<CachedBufferBlock>(cache_addr, size);
+Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(cpu_addr, size);
 }
 
 void OGLBufferCache::WriteBarrier() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 8c7145443..d94a11252 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf
 
 class CachedBufferBlock : public VideoCommon::BufferBlock {
 public:
-    explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size);
+    explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
     ~CachedBufferBlock();
 
     const GLuint* GetHandle() const {
@@ -55,7 +55,7 @@ public:
     }
 
 protected:
-    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
+    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
 
     void WriteBarrier() override;
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 1a2e2a9f7..c286502ba 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -131,6 +131,31 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
     return bindings;
 }
 
+bool IsASTCSupported() {
+    static constexpr std::array formats = {
+        GL_COMPRESSED_RGBA_ASTC_4x4_KHR,           GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
+        GL_COMPRESSED_RGBA_ASTC_5x5_KHR,           GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
+        GL_COMPRESSED_RGBA_ASTC_6x6_KHR,           GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
+        GL_COMPRESSED_RGBA_ASTC_8x6_KHR,           GL_COMPRESSED_RGBA_ASTC_8x8_KHR,
+        GL_COMPRESSED_RGBA_ASTC_10x5_KHR,          GL_COMPRESSED_RGBA_ASTC_10x6_KHR,
+        GL_COMPRESSED_RGBA_ASTC_10x8_KHR,          GL_COMPRESSED_RGBA_ASTC_10x10_KHR,
+        GL_COMPRESSED_RGBA_ASTC_12x10_KHR,         GL_COMPRESSED_RGBA_ASTC_12x12_KHR,
+        GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR,   GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR,
+        GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR,   GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR,
+        GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR,   GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,
+        GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR,   GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR,
+        GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR,  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR,
+        GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR,  GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
+        GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
+    };
+    return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) {
+               GLint supported;
+               glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1,
+                                     &supported);
+               return supported == GL_TRUE;
+           }) == formats.end();
+}
+
 } // Anonymous namespace
 
 Device::Device() : base_bindings{BuildBaseBindings()} {
@@ -152,6 +177,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
     has_shader_ballot = GLAD_GL_ARB_shader_ballot;
     has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
     has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
+    has_astc = IsASTCSupported();
     has_variable_aoffi = TestVariableAoffi();
     has_component_indexing_bug = is_amd;
     has_precise_bug = TestPreciseBug();
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index d73b099d0..a55050cb5 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -64,6 +64,10 @@ public:
         return has_image_load_formatted;
     }
 
+    bool HasASTC() const {
+        return has_astc;
+    }
+
     bool HasVariableAoffi() const {
         return has_variable_aoffi;
     }
@@ -97,6 +101,7 @@ private:
     bool has_shader_ballot{};
     bool has_vertex_viewport_layer{};
     bool has_image_load_formatted{};
+    bool has_astc{};
     bool has_variable_aoffi{};
     bool has_component_indexing_bug{};
     bool has_precise_bug{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 826eee7df..368f399df 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -386,11 +386,14 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using
     texture_cache.GuardRenderTargets(true);
     View color_surface;
     if (using_color_fb) {
-        color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
+        const std::size_t index = regs.clear_buffers.RT;
+        color_surface = texture_cache.GetColorBufferSurface(index, true);
+        texture_cache.MarkColorBufferInUse(index);
     }
     View depth_surface;
     if (using_depth_fb || using_stencil_fb) {
-        depth_surface = texture_cache.GetDepthBufferSurface(false);
+        depth_surface = texture_cache.GetDepthBufferSurface(true);
+        texture_cache.MarkDepthBufferInUse();
     }
     texture_cache.GuardRenderTargets(false);
 
@@ -444,6 +447,7 @@ void RasterizerOpenGL::Clear() {
     }
 
     SyncRasterizeEnable();
+    SyncStencilTestState();
 
     if (regs.clear_flags.scissor) {
         SyncScissorTest();
@@ -652,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
 
 void RasterizerOpenGL::FlushAll() {}
 
-void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
+void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-    if (!addr || !size) {
+    if (addr == 0 || size == 0) {
         return;
     }
     texture_cache.FlushRegion(addr, size);
@@ -662,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
     query_cache.FlushRegion(addr, size);
 }
 
-void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
+void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-    if (!addr || !size) {
+    if (addr == 0 || size == 0) {
         return;
     }
     texture_cache.InvalidateRegion(addr, size);
@@ -673,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
     query_cache.InvalidateRegion(addr, size);
 }
 
-void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     if (Settings::values.use_accurate_gpu_emulation) {
         FlushRegion(addr, size);
     }
@@ -712,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
 
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 
-    const auto surface{
-        texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
+    const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
     if (!surface) {
         return {};
     }
@@ -1052,12 +1055,8 @@ void RasterizerOpenGL::SyncStencilTestState() {
     flags[Dirty::StencilTest] = false;
 
     const auto& regs = gpu.regs;
-    if (!regs.stencil_enable) {
-        glDisable(GL_STENCIL_TEST);
-        return;
-    }
+    oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
 
-    glEnable(GL_STENCIL_TEST);
     glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
                           regs.stencil_front_func_ref, regs.stencil_front_func_mask);
     glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 2d3be2437..212dad852 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -65,9 +65,9 @@ public:
     void ResetCounter(VideoCore::QueryType type) override;
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void FlushAll() override;
-    void FlushRegion(CacheAddr addr, u64 size) override;
-    void InvalidateRegion(CacheAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void FlushCommands() override;
     void TickFrame() override;
     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index e3d31c3eb..6d2ff20f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
 
 } // Anonymous namespace
 
-CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
+CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
                            std::shared_ptr<VideoCommon::Shader::Registry> registry,
                            ShaderEntries entries, std::shared_ptr<OGLProgram> program)
-    : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
-      cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
+    : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
+      size_in_bytes{size_in_bytes}, program{std::move(program)} {}
 
 CachedShader::~CachedShader() = default;
 
@@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
     entry.bindless_samplers = registry->GetBindlessSamplers();
     params.disk_cache.SaveEntry(std::move(entry));
 
-    return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
-                                                          size_in_bytes, std::move(registry),
-                                                          MakeEntries(ir), std::move(program)));
+    return std::shared_ptr<CachedShader>(new CachedShader(
+        params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
 }
 
 Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
     entry.bindless_samplers = registry->GetBindlessSamplers();
     params.disk_cache.SaveEntry(std::move(entry));
 
-    return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
-                                                          size_in_bytes, std::move(registry),
-                                                          MakeEntries(ir), std::move(program)));
+    return std::shared_ptr<CachedShader>(new CachedShader(
+        params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
 }
 
 Shader CachedShader::CreateFromCache(const ShaderParameters& params,
                                      const PrecompiledShader& precompiled_shader,
                                      std::size_t size_in_bytes) {
-    return std::shared_ptr<CachedShader>(new CachedShader(
-        params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
-        precompiled_shader.entries, precompiled_shader.program));
+    return std::shared_ptr<CachedShader>(
+        new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
+                         precompiled_shader.entries, precompiled_shader.program));
 }
 
 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -327,8 +325,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
 
     const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
                             std::size_t end) {
-        context->MakeCurrent();
-        SCOPE_EXIT({ return context->DoneCurrent(); });
+        const auto scope = context->Acquire();
 
         for (std::size_t i = begin; i < end; ++i) {
             if (stop_loading) {
@@ -450,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     const GPUVAddr address{GetShaderAddress(system, program)};
 
     // Look up shader in the cache based on address
-    const auto host_ptr{memory_manager.GetPointer(address)};
-    Shader shader{TryGet(host_ptr)};
+    const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
+    Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
     if (shader) {
         return last_shaders[static_cast<std::size_t>(program)] = shader;
     }
 
+    const auto host_ptr{memory_manager.GetPointer(address)};
+
     // No shader found - create a new one
     ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
     ProgramCode code_b;
@@ -466,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 
     const auto unique_identifier = GetUniqueIdentifier(
         GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
-    const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
-    const ShaderParameters params{system,   disk_cache, device,
-                                  cpu_addr, host_ptr,   unique_identifier};
+
+    const ShaderParameters params{system,    disk_cache, device,
+                                  *cpu_addr, host_ptr,   unique_identifier};
 
     const auto found = runtime_cache.find(unique_identifier);
     if (found == runtime_cache.end()) {
@@ -485,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 
 Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
     auto& memory_manager{system.GPU().MemoryManager()};
-    const auto host_ptr{memory_manager.GetPointer(code_addr)};
-    auto kernel = TryGet(host_ptr);
+    const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
+
+    auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
     if (kernel) {
         return kernel;
     }
 
+    const auto host_ptr{memory_manager.GetPointer(code_addr)};
     // No kernel found, create a new one
     auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
     const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
-    const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
-    const ShaderParameters params{system,   disk_cache, device,
-                                  cpu_addr, host_ptr,   unique_identifier};
+
+    const ShaderParameters params{system,    disk_cache, device,
+                                  *cpu_addr, host_ptr,   unique_identifier};
 
     const auto found = runtime_cache.find(unique_identifier);
     if (found == runtime_cache.end()) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 4935019fc..c836df5bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -65,11 +65,6 @@ public:
     /// Gets the GL program handle for the shader
     GLuint GetHandle() const;
 
-    /// Returns the guest CPU address of the shader
-    VAddr GetCpuAddr() const override {
-        return cpu_addr;
-    }
-
     /// Returns the size in bytes of the shader
     std::size_t GetSizeInBytes() const override {
         return size_in_bytes;
@@ -90,13 +85,12 @@ public:
                                   std::size_t size_in_bytes);
 
 private:
-    explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
+    explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
                           std::shared_ptr<VideoCommon::Shader::Registry> registry,
                           ShaderEntries entries, std::shared_ptr<OGLProgram> program);
 
     std::shared_ptr<VideoCommon::Shader::Registry> registry;
     ShaderEntries entries;
-    VAddr cpu_addr = 0;
     std::size_t size_in_bytes = 0;
     std::shared_ptr<OGLProgram> program;
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 8aa4a7ac9..160ae4340 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -31,11 +31,11 @@ namespace {
 
 using Tegra::Engines::ShaderType;
 using Tegra::Shader::Attribute;
-using Tegra::Shader::AttributeUse;
 using Tegra::Shader::Header;
 using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::IpaMode;
 using Tegra::Shader::IpaSampleMode;
+using Tegra::Shader::PixelImap;
 using Tegra::Shader::Register;
 using VideoCommon::Shader::BuildTransformFeedback;
 using VideoCommon::Shader::Registry;
@@ -702,20 +702,19 @@ private:
         code.AddNewLine();
     }
 
-    std::string GetInputFlags(AttributeUse attribute) {
+    const char* GetInputFlags(PixelImap attribute) {
         switch (attribute) {
-        case AttributeUse::Perspective:
-            // Default, Smooth
-            return {};
-        case AttributeUse::Constant:
-            return "flat ";
-        case AttributeUse::ScreenLinear:
-            return "noperspective ";
-        default:
-        case AttributeUse::Unused:
-            UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
-            return {};
+        case PixelImap::Perspective:
+            return "smooth";
+        case PixelImap::Constant:
+            return "flat";
+        case PixelImap::ScreenLinear:
+            return "noperspective";
+        case PixelImap::Unused:
+            break;
         }
+        UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
+        return {};
     }
 
     void DeclareInputAttributes() {
@@ -749,8 +748,8 @@ private:
 
         std::string suffix;
         if (stage == ShaderType::Fragment) {
-            const auto input_mode{header.ps.GetAttributeUse(location)};
-            if (skip_unused && input_mode == AttributeUse::Unused) {
+            const auto input_mode{header.ps.GetPixelImap(location)};
+            if (input_mode == PixelImap::Unused) {
                 return;
             }
             suffix = GetInputFlags(input_mode);
@@ -927,7 +926,7 @@ private:
                 const u32 address{generic_base + index * generic_stride + element * element_stride};
 
                 const bool declared = stage != ShaderType::Fragment ||
-                                      header.ps.GetAttributeUse(index) != AttributeUse::Unused;
+                                      header.ps.GetPixelImap(index) != PixelImap::Unused;
                 const std::string value =
                     declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
                 code.AddLine("case 0x{:X}U: return {};", address, value);
@@ -1142,8 +1141,7 @@ private:
                                     GetSwizzle(element)),
                         Type::Float};
             case ShaderType::Fragment:
-                return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)),
-                        Type::Float};
+                return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
             default:
                 UNREACHABLE();
             }
@@ -2114,6 +2112,10 @@ private:
 
     template <const std::string_view& opname, Type type>
     Expression Atomic(Operation operation) {
+        if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
+            UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
+            return {};
+        }
         return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
                             Visit(operation[1]).As(type)),
                 type};
@@ -2307,6 +2309,8 @@ private:
         ~Func() = delete;
 
         static constexpr std::string_view Add = "Add";
+        static constexpr std::string_view Min = "Min";
+        static constexpr std::string_view Max = "Max";
         static constexpr std::string_view And = "And";
         static constexpr std::string_view Or = "Or";
         static constexpr std::string_view Xor = "Xor";
@@ -2457,7 +2461,21 @@ private:
         &GLSLDecompiler::AtomicImage<Func::Xor>,
         &GLSLDecompiler::AtomicImage<Func::Exchange>,
 
+        &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
         &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
+        &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
+        &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
+        &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
+        &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
+        &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
+
+        &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
+        &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
+        &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
+        &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
+        &GLSLDecompiler::Atomic<Func::And, Type::Int>,
+        &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
+        &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
 
         &GLSLDecompiler::Branch,
         &GLSLDecompiler::BranchIndirect,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index f424e3000..36590a6d0 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,7 +24,6 @@ using Tegra::Texture::SwizzleSource;
 using VideoCore::MortonSwizzleMode;
 
 using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::SurfaceCompression;
 using VideoCore::Surface::SurfaceTarget;
 using VideoCore::Surface::SurfaceType;
 
@@ -37,102 +36,100 @@ namespace {
 
 struct FormatTuple {
     GLint internal_format;
-    GLenum format;
-    GLenum type;
-    bool compressed;
+    GLenum format = GL_NONE;
+    GLenum type = GL_NONE;
 };
 
 constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                        // ABGR8U
-    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false},                                      // ABGR8S
-    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false},                         // ABGR8UI
-    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                        // B5G6R5U
-    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},                  // A2B10G10R10U
-    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false},                    // A1B5G5R5U
-    {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false},                                       // R8U
-    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false},                             // R8UI
-    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false},                                    // RGBA16F
-    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false},                                 // RGBA16U
-    {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false},                                    // RGBA16S
-    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false},                       // RGBA16UI
-    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false},            // R11FG11FB10F
-    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false},                         // RGBA32UI
-    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true},     // DXT1
-    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true},     // DXT23
-    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true},     // DXT45
-    {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true},               // DXN1
-    {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true},                 // DXN2UNORM
-    {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true},                           // DXN2SNORM
-    {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true},        // BC7U
-    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16
-    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true},   // BC6H_SF16
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_4X4
-    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false},                                   // BGRA8
-    {GL_RGBA32F, GL_RGBA, GL_FLOAT, false},                                         // RGBA32F
-    {GL_RG32F, GL_RG, GL_FLOAT, false},                                             // RG32F
-    {GL_R32F, GL_RED, GL_FLOAT, false},                                             // R32F
-    {GL_R16F, GL_RED, GL_HALF_FLOAT, false},                                        // R16F
-    {GL_R16, GL_RED, GL_UNSIGNED_SHORT, false},                                     // R16U
-    {GL_R16_SNORM, GL_RED, GL_SHORT, false},                                        // R16S
-    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false},                           // R16UI
-    {GL_R16I, GL_RED_INTEGER, GL_SHORT, false},                                     // R16I
-    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false},                                     // RG16
-    {GL_RG16F, GL_RG, GL_HALF_FLOAT, false},                                        // RG16F
-    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false},                           // RG16UI
-    {GL_RG16I, GL_RG_INTEGER, GL_SHORT, false},                                     // RG16I
-    {GL_RG16_SNORM, GL_RG, GL_SHORT, false},                                        // RG16S
-    {GL_RGB32F, GL_RGB, GL_FLOAT, false},                                           // RGB32F
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                 // RGBA8_SRGB
-    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false},                                       // RG8U
-    {GL_RG8_SNORM, GL_RG, GL_BYTE, false},                                          // RG8S
-    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false},                             // RG32UI
-    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false},                                     // RGBX16F
-    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false},                             // R32UI
-    {GL_R32I, GL_RED_INTEGER, GL_INT, false},                                       // R32I
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X8
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X5
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_5X4
-    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false},                            // BGRA8
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},             // ABGR8U
+    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                           // ABGR8S
+    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},              // ABGR8UI
+    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},             // B5G6R5U
+    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},       // A2B10G10R10U
+    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},         // A1B5G5R5U
+    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                            // R8U
+    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                  // R8UI
+    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                         // RGBA16F
+    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                      // RGBA16U
+    {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                         // RGBA16S
+    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},            // RGBA16UI
+    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F
+    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},              // RGBA32UI
+    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                           // DXT1
+    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                           // DXT23
+    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                           // DXT45
+    {GL_COMPRESSED_RED_RGTC1},                                    // DXN1
+    {GL_COMPRESSED_RG_RGTC2},                                     // DXN2UNORM
+    {GL_COMPRESSED_SIGNED_RG_RGTC2},                              // DXN2SNORM
+    {GL_COMPRESSED_RGBA_BPTC_UNORM},                              // BC7U
+    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                      // BC6H_UF16
+    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                        // BC6H_SF16
+    {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                            // ASTC_2D_4X4
+    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                        // BGRA8
+    {GL_RGBA32F, GL_RGBA, GL_FLOAT},                              // RGBA32F
+    {GL_RG32F, GL_RG, GL_FLOAT},                                  // RG32F
+    {GL_R32F, GL_RED, GL_FLOAT},                                  // R32F
+    {GL_R16F, GL_RED, GL_HALF_FLOAT},                             // R16F
+    {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                          // R16U
+    {GL_R16_SNORM, GL_RED, GL_SHORT},                             // R16S
+    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                // R16UI
+    {GL_R16I, GL_RED_INTEGER, GL_SHORT},                          // R16I
+    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                          // RG16
+    {GL_RG16F, GL_RG, GL_HALF_FLOAT},                             // RG16F
+    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                // RG16UI
+    {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                          // RG16I
+    {GL_RG16_SNORM, GL_RG, GL_SHORT},                             // RG16S
+    {GL_RGB32F, GL_RGB, GL_FLOAT},                                // RGB32F
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},      // RGBA8_SRGB
+    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                            // RG8U
+    {GL_RG8_SNORM, GL_RG, GL_BYTE},                               // RG8S
+    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                  // RG32UI
+    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                          // RGBX16F
+    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                  // R32UI
+    {GL_R32I, GL_RED_INTEGER, GL_INT},                            // R32I
+    {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                            // ASTC_2D_8X8
+    {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                            // ASTC_2D_8X5
+    {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                            // ASTC_2D_5X4
+    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE},                 // BGRA8
     // Compressed sRGB formats
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true},    // BC7U_SRGB
-    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false},                        // R4G4B4A4U
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_4X4_SRGB
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_8X8_SRGB
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_8X5_SRGB
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_5X4_SRGB
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_5X5
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_5X5_SRGB
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_10X8
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_10X8_SRGB
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_6X6
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_6X6_SRGB
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_10X10
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_10X10_SRGB
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_12X12
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_12X12_SRGB
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_8X6
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_8X6_SRGB
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_6X5
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_6X5_SRGB
-    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},           // DXT1_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},           // DXT23_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},           // DXT45_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},              // BC7U_SRGB
+    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},          // ASTC_2D_4X4_SRGB
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},          // ASTC_2D_8X8_SRGB
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},          // ASTC_2D_8X5_SRGB
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR},          // ASTC_2D_5X4_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                  // ASTC_2D_5X5
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR},          // ASTC_2D_5X5_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                 // ASTC_2D_10X8
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR},         // ASTC_2D_10X8_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                  // ASTC_2D_6X6
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR},          // ASTC_2D_6X6_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                // ASTC_2D_10X10
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR},        // ASTC_2D_10X10_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                // ASTC_2D_12X12
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR},        // ASTC_2D_12X12_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                  // ASTC_2D_8X6
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR},          // ASTC_2D_8X6_SRGB
+    {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                  // ASTC_2D_6X5
+    {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR},          // ASTC_2D_6X5_SRGB
+    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},  // E5B9G9R9F
 
     // Depth formats
-    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false},         // Z32F
-    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16
+    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},         // Z32F
+    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16
 
     // DepthStencil formats
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false},               // Z24S8
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false},               // S8Z24
-    {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},               // Z24S8
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},               // S8Z24
+    {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8
 }};
 
 const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
     ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
-    const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
-    return format;
+    return tex_format_tuples[static_cast<std::size_t>(pixel_format)];
 }
 
 GLenum GetTextureTarget(const SurfaceTarget& target) {
@@ -242,13 +239,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
 
 } // Anonymous namespace
 
-CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params)
-    : VideoCommon::SurfaceBase<View>(gpu_addr, params) {
-    const auto& tuple{GetFormatTuple(params.pixel_format)};
-    internal_format = tuple.internal_format;
-    format = tuple.format;
-    type = tuple.type;
-    is_compressed = tuple.compressed;
+CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
+                             bool is_astc_supported)
+    : VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) {
+    if (is_converted) {
+        internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
+        format = GL_RGBA;
+        type = GL_UNSIGNED_BYTE;
+    } else {
+        const auto& tuple{GetFormatTuple(params.pixel_format)};
+        internal_format = tuple.internal_format;
+        format = tuple.format;
+        type = tuple.type;
+        is_compressed = params.IsCompressed();
+    }
     target = GetTextureTarget(params.target);
     texture = CreateTexture(params, target, internal_format, texture_buffer);
     DecorateSurfaceName();
@@ -264,7 +268,7 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
 
     if (params.IsBuffer()) {
         glGetNamedBufferSubData(texture_buffer.handle, 0,
-                                static_cast<GLsizeiptr>(params.GetHostSizeInBytes()),
+                                static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)),
                                 staging_buffer.data());
         return;
     }
@@ -272,9 +276,10 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
     SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
 
     for (u32 level = 0; level < params.emulated_levels; ++level) {
-        glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
+        glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
         glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
-        const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
+        const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
+
         u8* const mip_data = staging_buffer.data() + mip_offset;
         const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
         if (is_compressed) {
@@ -294,14 +299,10 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
 }
 
 void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
-    glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
+    glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
     glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
 
-    auto compression_type = params.GetCompressionType();
-
-    const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
-                                       ? params.GetConvertedMipmapOffset(level)
-                                       : params.GetHostMipmapLevelOffset(level);
+    const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
     const u8* buffer{staging_buffer.data() + mip_offset};
     if (is_compressed) {
         const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
@@ -482,7 +483,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
 TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
                                        VideoCore::RasterizerInterface& rasterizer,
                                        const Device& device, StateTracker& state_tracker)
-    : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} {
+    : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} {
     src_framebuffer.Create();
     dst_framebuffer.Create();
 }
@@ -490,7 +491,7 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
 TextureCacheOpenGL::~TextureCacheOpenGL() = default;
 
 Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
-    return std::make_shared<CachedSurface>(gpu_addr, params);
+    return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported);
 }
 
 void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
@@ -596,7 +597,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
 
     glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
 
-    if (source_format.compressed) {
+    if (src_surface->IsCompressed()) {
         glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
                                     nullptr);
     } else {
@@ -610,7 +611,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
     const GLsizei width = static_cast<GLsizei>(dst_params.width);
     const GLsizei height = static_cast<GLsizei>(dst_params.height);
     const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
-    if (dest_format.compressed) {
+    if (dst_surface->IsCompressed()) {
         LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
         UNREACHABLE();
     } else {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 6658c6ffd..02d9981a1 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -37,7 +37,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> {
     friend CachedSurfaceView;
 
 public:
-    explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params);
+    explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported);
     ~CachedSurface();
 
     void UploadTexture(const std::vector<u8>& staging_buffer) override;
@@ -51,6 +51,10 @@ public:
         return texture.handle;
     }
 
+    bool IsCompressed() const {
+        return is_compressed;
+    }
+
 protected:
     void DecorateSurfaceName() override;
 
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index fca5e3ec0..f1a28cc21 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -30,8 +30,6 @@ namespace OpenGL {
 
 namespace {
 
-// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
-// to wait on available presentation frames.
 constexpr std::size_t SWAP_CHAIN_SIZE = 3;
 
 struct Frame {
@@ -214,7 +212,7 @@ public:
     std::deque<Frame*> present_queue;
     Frame* previous_frame{};
 
-    FrameMailbox() : has_debug_tool{HasDebugTool()} {
+    FrameMailbox() {
         for (auto& frame : swap_chain) {
             free_queue.push(&frame);
         }
@@ -285,13 +283,9 @@ public:
         std::unique_lock lock{swap_chain_lock};
         present_queue.push_front(frame);
         present_cv.notify_one();
-
-        DebugNotifyNextFrame();
     }
 
     Frame* TryGetPresentFrame(int timeout_ms) {
-        DebugWaitForNextFrame();
-
         std::unique_lock lock{swap_chain_lock};
         // wait for new entries in the present_queue
         present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
@@ -317,38 +311,12 @@ public:
         previous_frame = frame;
         return frame;
     }
-
-private:
-    std::mutex debug_synch_mutex;
-    std::condition_variable debug_synch_condition;
-    std::atomic_int frame_for_debug{};
-    const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
-
-    /// Signal that a new frame is available (called from GPU thread)
-    void DebugNotifyNextFrame() {
-        if (!has_debug_tool) {
-            return;
-        }
-        frame_for_debug++;
-        std::lock_guard lock{debug_synch_mutex};
-        debug_synch_condition.notify_one();
-    }
-
-    /// Wait for a new frame to be available (called from presentation thread)
-    void DebugWaitForNextFrame() {
-        if (!has_debug_tool) {
-            return;
-        }
-        const int last_frame = frame_for_debug;
-        std::unique_lock lock{debug_synch_mutex};
-        debug_synch_condition.wait(lock,
-                                   [this, last_frame] { return frame_for_debug > last_frame; });
-    }
 };
 
-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
+RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
+                               Core::Frontend::GraphicsContext& context)
     : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
-      frame_mailbox{std::make_unique<FrameMailbox>()} {}
+      frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {}
 
 RendererOpenGL::~RendererOpenGL() = default;
 
@@ -356,8 +324,6 @@ MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 12
 MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128));
 
 void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
-    render_window.PollEvents();
-
     if (!framebuffer) {
         return;
     }
@@ -413,6 +379,13 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
         m_current_frame++;
         rasterizer->TickFrame();
     }
+
+    render_window.PollEvents();
+    if (has_debug_tool) {
+        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+        Present(0);
+        context.SwapBuffers();
+    }
 }
 
 void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
@@ -480,6 +453,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
 }
 
 void RendererOpenGL::InitOpenGLObjects() {
+    frame_mailbox = std::make_unique<FrameMailbox>();
+
     glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
                  0.0f);
 
@@ -692,12 +667,21 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
     glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
 }
 
-void RendererOpenGL::TryPresent(int timeout_ms) {
+bool RendererOpenGL::TryPresent(int timeout_ms) {
+    if (has_debug_tool) {
+        LOG_DEBUG(Render_OpenGL,
+                  "Skipping presentation because we are presenting on the main context");
+        return false;
+    }
+    return Present(timeout_ms);
+}
+
+bool RendererOpenGL::Present(int timeout_ms) {
     const auto& layout = render_window.GetFramebufferLayout();
     auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms);
     if (!frame) {
         LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present");
-        return;
+        return false;
     }
 
     // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a
@@ -725,6 +709,7 @@ void RendererOpenGL::TryPresent(int timeout_ms) {
     glFlush();
 
     glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
+    return true;
 }
 
 void RendererOpenGL::RenderScreenshot() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 33073ce5b..50b647661 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -55,13 +55,14 @@ class FrameMailbox;
 
 class RendererOpenGL final : public VideoCore::RendererBase {
 public:
-    explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
+    explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
+                            Core::Frontend::GraphicsContext& context);
     ~RendererOpenGL() override;
 
     bool Init() override;
     void ShutDown() override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-    void TryPresent(int timeout_ms) override;
+    bool TryPresent(int timeout_ms) override;
 
 private:
     /// Initializes the OpenGL state and creates persistent objects.
@@ -89,8 +90,11 @@ private:
 
     void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
 
+    bool Present(int timeout_ms);
+
     Core::Frontend::EmuWindow& emu_window;
     Core::System& system;
+    Core::Frontend::GraphicsContext& context;
 
     StateTracker state_tracker{system};
 
@@ -115,6 +119,8 @@ private:
 
     /// Frame presentation mailbox
     std::unique_ptr<FrameMailbox> frame_mailbox;
+
+    bool has_debug_tool = false;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h
index 323bf6b39..89a035ca4 100644
--- a/src/video_core/renderer_vulkan/declarations.h
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -39,6 +39,7 @@ using UniqueFence = UniqueHandle<vk::Fence>;
 using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
 using UniqueImage = UniqueHandle<vk::Image>;
 using UniqueImageView = UniqueHandle<vk::ImageView>;
+using UniqueInstance = UniqueHandle<vk::Instance>;
 using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
 using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
 using UniquePipeline = UniqueHandle<vk::Pipeline>;
@@ -50,6 +51,7 @@ using UniqueSampler = UniqueHandle<vk::Sampler>;
 using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
 using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
 using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
+using UniqueSurfaceKHR = UniqueHandle<vk::SurfaceKHR>;
 using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
 using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
 using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 42bb01418..9cdb4b627 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -2,13 +2,18 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
+#include <array>
+#include <cstring>
 #include <memory>
 #include <optional>
+#include <string>
 #include <vector>
 
 #include <fmt/format.h>
 
 #include "common/assert.h"
+#include "common/dynamic_library.h"
 #include "common/logging/log.h"
 #include "common/telemetry.h"
 #include "core/core.h"
@@ -30,15 +35,30 @@
 #include "video_core/renderer_vulkan/vk_state_tracker.h"
 #include "video_core/renderer_vulkan/vk_swapchain.h"
 
+// Include these late to avoid changing Vulkan-Hpp's dynamic dispatcher size
+#ifdef _WIN32
+#include <windows.h>
+// ensure include order
+#include <vulkan/vulkan_win32.h>
+#endif
+
+#ifdef __linux__
+#include <X11/Xlib.h>
+#include <vulkan/vulkan_wayland.h>
+#include <vulkan/vulkan_xlib.h>
+#endif
+
 namespace Vulkan {
 
 namespace {
 
+using Core::Frontend::WindowSystemType;
+
 VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
                        VkDebugUtilsMessageTypeFlagsEXT type,
                        const VkDebugUtilsMessengerCallbackDataEXT* data,
                        [[maybe_unused]] void* user_data) {
-    const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_};
+    const auto severity{static_cast<vk::DebugUtilsMessageSeverityFlagBitsEXT>(severity_)};
     const char* message{data->pMessage};
 
     if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) {
@@ -53,6 +73,110 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
     return VK_FALSE;
 }
 
+Common::DynamicLibrary OpenVulkanLibrary() {
+    Common::DynamicLibrary library;
+#ifdef __APPLE__
+    // Check if a path to a specific Vulkan library has been specified.
+    char* libvulkan_env = getenv("LIBVULKAN_PATH");
+    if (!libvulkan_env || !library.Open(libvulkan_env)) {
+        // Use the libvulkan.dylib from the application bundle.
+        std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+        library.Open(filename.c_str());
+    }
+#else
+    std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
+    if (!library.Open(filename.c_str())) {
+        // Android devices may not have libvulkan.so.1, only libvulkan.so.
+        filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
+        library.Open(filename.c_str());
+    }
+#endif
+    return library;
+}
+
+UniqueInstance CreateInstance(Common::DynamicLibrary& library, vk::DispatchLoaderDynamic& dld,
+                              WindowSystemType window_type = WindowSystemType::Headless,
+                              bool enable_layers = false) {
+    if (!library.IsOpen()) {
+        LOG_ERROR(Render_Vulkan, "Vulkan library not available");
+        return UniqueInstance{};
+    }
+    PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
+    if (!library.GetSymbol("vkGetInstanceProcAddr", &vkGetInstanceProcAddr)) {
+        LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
+        return UniqueInstance{};
+    }
+    dld.init(vkGetInstanceProcAddr);
+
+    std::vector<const char*> extensions;
+    extensions.reserve(4);
+    switch (window_type) {
+    case Core::Frontend::WindowSystemType::Headless:
+        break;
+#ifdef _WIN32
+    case Core::Frontend::WindowSystemType::Windows:
+        extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
+        break;
+#endif
+#ifdef __linux__
+    case Core::Frontend::WindowSystemType::X11:
+        extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
+        break;
+    case Core::Frontend::WindowSystemType::Wayland:
+        extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
+        break;
+#endif
+    default:
+        LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
+        break;
+    }
+    if (window_type != Core::Frontend::WindowSystemType::Headless) {
+        extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
+    }
+    if (enable_layers) {
+        extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
+    }
+
+    u32 num_properties;
+    if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, nullptr, dld) !=
+        vk::Result::eSuccess) {
+        LOG_ERROR(Render_Vulkan, "Failed to query number of extension properties");
+        return UniqueInstance{};
+    }
+    std::vector<vk::ExtensionProperties> properties(num_properties);
+    if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, properties.data(),
+                                                 dld) != vk::Result::eSuccess) {
+        LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
+        return UniqueInstance{};
+    }
+
+    for (const char* extension : extensions) {
+        const auto it =
+            std::find_if(properties.begin(), properties.end(), [extension](const auto& prop) {
+                return !std::strcmp(extension, prop.extensionName);
+            });
+        if (it == properties.end()) {
+            LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
+            return UniqueInstance{};
+        }
+    }
+
+    const vk::ApplicationInfo application_info("yuzu Emulator", VK_MAKE_VERSION(0, 1, 0),
+                                               "yuzu Emulator", VK_MAKE_VERSION(0, 1, 0),
+                                               VK_API_VERSION_1_1);
+    const std::array layers = {"VK_LAYER_LUNARG_standard_validation"};
+    const vk::InstanceCreateInfo instance_ci(
+        {}, &application_info, enable_layers ? static_cast<u32>(layers.size()) : 0, layers.data(),
+        static_cast<u32>(extensions.size()), extensions.data());
+    vk::Instance unsafe_instance;
+    if (vk::createInstance(&instance_ci, nullptr, &unsafe_instance, dld) != vk::Result::eSuccess) {
+        LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
+        return UniqueInstance{};
+    }
+    dld.init(unsafe_instance);
+    return UniqueInstance(unsafe_instance, {nullptr, dld});
+}
+
 std::string GetReadableVersion(u32 version) {
     return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
                        VK_VERSION_PATCH(version));
@@ -141,32 +265,18 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     render_window.PollEvents();
 }
 
-void RendererVulkan::TryPresent(int /*timeout_ms*/) {
+bool RendererVulkan::TryPresent(int /*timeout_ms*/) {
     // TODO (bunnei): ImplementMe
+    return true;
 }
 
 bool RendererVulkan::Init() {
-    PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
-    render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface);
-    const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr);
-
-    std::optional<vk::DebugUtilsMessengerEXT> callback;
-    if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) {
-        callback = CreateDebugCallback(dldi);
-        if (!callback) {
-            return false;
-        }
-    }
-
-    if (!PickDevices(dldi)) {
-        if (callback) {
-            instance.destroy(*callback, nullptr, dldi);
-        }
+    library = OpenVulkanLibrary();
+    instance = CreateInstance(library, dld, render_window.GetWindowInfo().type,
+                              Settings::values.renderer_debug);
+    if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
         return false;
     }
-    debug_callback = UniqueDebugUtilsMessengerEXT(
-        *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>(
-                       instance, nullptr, device->GetDispatchLoader()));
 
     Report();
 
@@ -175,7 +285,7 @@ bool RendererVulkan::Init() {
     resource_manager = std::make_unique<VKResourceManager>(*device);
 
     const auto& framebuffer = render_window.GetFramebufferLayout();
-    swapchain = std::make_unique<VKSwapchain>(surface, *device);
+    swapchain = std::make_unique<VKSwapchain>(*surface, *device);
     swapchain->Create(framebuffer.width, framebuffer.height, false);
 
     state_tracker = std::make_unique<StateTracker>(system);
@@ -212,8 +322,10 @@ void RendererVulkan::ShutDown() {
     device.reset();
 }
 
-std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
-    const vk::DispatchLoaderDynamic& dldi) {
+bool RendererVulkan::CreateDebugCallback() {
+    if (!Settings::values.renderer_debug) {
+        return true;
+    }
     const vk::DebugUtilsMessengerCreateInfoEXT callback_ci(
         {},
         vk::DebugUtilsMessageSeverityFlagBitsEXT::eError |
@@ -224,32 +336,88 @@ std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
             vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation |
             vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance,
         &DebugCallback, nullptr);
-    vk::DebugUtilsMessengerEXT callback;
-    if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) !=
+    vk::DebugUtilsMessengerEXT unsafe_callback;
+    if (instance->createDebugUtilsMessengerEXT(&callback_ci, nullptr, &unsafe_callback, dld) !=
         vk::Result::eSuccess) {
         LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
-        return {};
+        return false;
+    }
+    debug_callback = UniqueDebugUtilsMessengerEXT(unsafe_callback, {*instance, nullptr, dld});
+    return true;
+}
+
+bool RendererVulkan::CreateSurface() {
+    [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo();
+    VkSurfaceKHR unsafe_surface = nullptr;
+
+#ifdef _WIN32
+    if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
+        const HWND hWnd = static_cast<HWND>(window_info.render_surface);
+        const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
+                                                   nullptr, 0, nullptr, hWnd};
+        const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
+            dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
+        if (!vkCreateWin32SurfaceKHR || vkCreateWin32SurfaceKHR(instance.get(), &win32_ci, nullptr,
+                                                                &unsafe_surface) != VK_SUCCESS) {
+            LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
+            return false;
+        }
+    }
+#endif
+#ifdef __linux__
+    if (window_info.type == Core::Frontend::WindowSystemType::X11) {
+        const VkXlibSurfaceCreateInfoKHR xlib_ci{
+            VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
+            static_cast<Display*>(window_info.display_connection),
+            reinterpret_cast<Window>(window_info.render_surface)};
+        const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
+            dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
+        if (!vkCreateXlibSurfaceKHR || vkCreateXlibSurfaceKHR(instance.get(), &xlib_ci, nullptr,
+                                                              &unsafe_surface) != VK_SUCCESS) {
+            LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
+            return false;
+        }
+    }
+    if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
+        const VkWaylandSurfaceCreateInfoKHR wayland_ci{
+            VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
+            static_cast<wl_display*>(window_info.display_connection),
+            static_cast<wl_surface*>(window_info.render_surface)};
+        const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
+            dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
+        if (!vkCreateWaylandSurfaceKHR ||
+            vkCreateWaylandSurfaceKHR(instance.get(), &wayland_ci, nullptr, &unsafe_surface) !=
+                VK_SUCCESS) {
+            LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
+            return false;
+        }
+    }
+#endif
+    if (!unsafe_surface) {
+        LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
+        return false;
     }
-    return callback;
+
+    surface = UniqueSurfaceKHR(unsafe_surface, {*instance, nullptr, dld});
+    return true;
 }
 
-bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) {
-    const auto devices = instance.enumeratePhysicalDevices(dldi);
+bool RendererVulkan::PickDevices() {
+    const auto devices = instance->enumeratePhysicalDevices(dld);
 
-    // TODO(Rodrigo): Choose device from config file
     const s32 device_index = Settings::values.vulkan_device;
     if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
         LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
         return false;
     }
-    const vk::PhysicalDevice physical_device = devices[device_index];
+    const vk::PhysicalDevice physical_device = devices[static_cast<std::size_t>(device_index)];
 
-    if (!VKDevice::IsSuitable(dldi, physical_device, surface)) {
+    if (!VKDevice::IsSuitable(physical_device, *surface, dld)) {
         return false;
     }
 
-    device = std::make_unique<VKDevice>(dldi, physical_device, surface);
-    return device->Create(dldi, instance);
+    device = std::make_unique<VKDevice>(dld, physical_device, *surface);
+    return device->Create(*instance);
 }
 
 void RendererVulkan::Report() const {
@@ -275,4 +443,33 @@ void RendererVulkan::Report() const {
     telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
 }
 
+std::vector<std::string> RendererVulkan::EnumerateDevices() {
+    // Avoid putting DispatchLoaderDynamic, it's too large
+    auto dld_memory = std::make_unique<vk::DispatchLoaderDynamic>();
+    auto& dld = *dld_memory;
+
+    Common::DynamicLibrary library = OpenVulkanLibrary();
+    UniqueInstance instance = CreateInstance(library, dld);
+    if (!instance) {
+        return {};
+    }
+
+    u32 num_devices;
+    if (instance->enumeratePhysicalDevices(&num_devices, nullptr, dld) != vk::Result::eSuccess) {
+        return {};
+    }
+    std::vector<vk::PhysicalDevice> devices(num_devices);
+    if (instance->enumeratePhysicalDevices(&num_devices, devices.data(), dld) !=
+        vk::Result::eSuccess) {
+        return {};
+    }
+
+    std::vector<std::string> names;
+    names.reserve(num_devices);
+    for (auto& device : devices) {
+        names.push_back(device.getProperties(dld).deviceName);
+    }
+    return names;
+}
+
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 3da08d2e4..42e253de5 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -6,8 +6,11 @@
 
 #include <memory>
 #include <optional>
+#include <string>
 #include <vector>
 
+#include "common/dynamic_library.h"
+
 #include "video_core/renderer_base.h"
 #include "video_core/renderer_vulkan/declarations.h"
 
@@ -42,20 +45,26 @@ public:
     bool Init() override;
     void ShutDown() override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-    void TryPresent(int timeout_ms) override;
+    bool TryPresent(int timeout_ms) override;
+
+    static std::vector<std::string> EnumerateDevices();
 
 private:
-    std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
-        const vk::DispatchLoaderDynamic& dldi);
+    bool CreateDebugCallback();
 
-    bool PickDevices(const vk::DispatchLoaderDynamic& dldi);
+    bool CreateSurface();
+
+    bool PickDevices();
 
     void Report() const;
 
     Core::System& system;
 
-    vk::Instance instance;
-    vk::SurfaceKHR surface;
+    Common::DynamicLibrary library;
+    vk::DispatchLoaderDynamic dld;
+
+    UniqueInstance instance;
+    UniqueSurfaceKHR surface;
 
     VKScreenInfo screen_info;
 
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1ba544943..326d74f29 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
 } // Anonymous namespace
 
 CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
-                                     CacheAddr cache_addr, std::size_t size)
-    : VideoCommon::BufferBlock{cache_addr, size} {
+                                     VAddr cpu_addr, std::size_t size)
+    : VideoCommon::BufferBlock{cpu_addr, size} {
     const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
                                          BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
                                              vk::BufferUsageFlagBits::eTransferDst,
@@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
 
 VKBufferCache::~VKBufferCache() = default;
 
-Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
-    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
+Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
 }
 
 const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3f38eed0c..508214618 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -30,7 +30,7 @@ class VKScheduler;
 class CachedBufferBlock final : public VideoCommon::BufferBlock {
 public:
     explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
-                               CacheAddr cache_addr, std::size_t size);
+                               VAddr cpu_addr, std::size_t size);
     ~CachedBufferBlock();
 
     const vk::Buffer* GetHandle() const {
@@ -55,7 +55,7 @@ public:
 protected:
     void WriteBarrier() override {}
 
-    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
+    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
 
     const vk::Buffer* ToHandle(const Buffer& buffer) override;
 
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 28d2fbc4f..6f4ae9132 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -10,6 +10,7 @@
 #include <string_view>
 #include <thread>
 #include <vector>
+
 #include "common/assert.h"
 #include "core/settings.h"
 #include "video_core/renderer_vulkan/declarations.h"
@@ -35,20 +36,20 @@ void SetNext(void**& next, T& data) {
 }
 
 template <typename T>
-T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
+T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) {
     vk::PhysicalDeviceFeatures2 features;
     T extension_features;
     features.pNext = &extension_features;
-    physical.getFeatures2(&features, dldi);
+    physical.getFeatures2(&features, dld);
     return extension_features;
 }
 
 template <typename T>
-T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
+T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) {
     vk::PhysicalDeviceProperties2 properties;
     T extension_properties;
     properties.pNext = &extension_properties;
-    physical.getProperties2(&properties, dldi);
+    physical.getProperties2(&properties, dld);
     return extension_properties;
 }
 
@@ -78,19 +79,19 @@ vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, Format
 
 } // Anonymous namespace
 
-VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical,
                    vk::SurfaceKHR surface)
-    : physical{physical}, properties{physical.getProperties(dldi)},
-      format_properties{GetFormatProperties(dldi, physical)} {
-    SetupFamilies(dldi, surface);
-    SetupFeatures(dldi);
+    : dld{dld}, physical{physical}, properties{physical.getProperties(dld)},
+      format_properties{GetFormatProperties(dld, physical)} {
+    SetupFamilies(surface);
+    SetupFeatures();
 }
 
 VKDevice::~VKDevice() = default;
 
-bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
+bool VKDevice::Create(vk::Instance instance) {
     const auto queue_cis = GetDeviceQueueCreateInfos();
-    const std::vector extensions = LoadExtensions(dldi);
+    const std::vector extensions = LoadExtensions();
 
     vk::PhysicalDeviceFeatures2 features2;
     void** next = &features2.pNext;
@@ -165,15 +166,13 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
                                    nullptr);
     device_ci.pNext = &features2;
 
-    vk::Device dummy_logical;
-    if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
+    vk::Device unsafe_logical;
+    if (physical.createDevice(&device_ci, nullptr, &unsafe_logical, dld) != vk::Result::eSuccess) {
         LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
         return false;
     }
-
-    dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
-    logical = UniqueDevice(
-        dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
+    dld.init(instance, dld.vkGetInstanceProcAddr, unsafe_logical);
+    logical = UniqueDevice(unsafe_logical, {nullptr, dld});
 
     CollectTelemetryParameters();
 
@@ -235,20 +234,23 @@ void VKDevice::ReportLoss() const {
     // *(VKGraphicsPipeline*)data[0]
 }
 
-bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
-                                      const vk::DispatchLoaderDynamic& dldi) const {
+bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const {
     // Disable for now to avoid converting ASTC twice.
-    return false;
     static constexpr std::array astc_formats = {
-        vk::Format::eAstc4x4SrgbBlock,    vk::Format::eAstc8x8SrgbBlock,
-        vk::Format::eAstc8x5SrgbBlock,    vk::Format::eAstc5x4SrgbBlock,
+        vk::Format::eAstc4x4UnormBlock,   vk::Format::eAstc4x4SrgbBlock,
+        vk::Format::eAstc5x4UnormBlock,   vk::Format::eAstc5x4SrgbBlock,
         vk::Format::eAstc5x5UnormBlock,   vk::Format::eAstc5x5SrgbBlock,
-        vk::Format::eAstc10x8UnormBlock,  vk::Format::eAstc10x8SrgbBlock,
+        vk::Format::eAstc6x5UnormBlock,   vk::Format::eAstc6x5SrgbBlock,
         vk::Format::eAstc6x6UnormBlock,   vk::Format::eAstc6x6SrgbBlock,
-        vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
-        vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock,
+        vk::Format::eAstc8x5UnormBlock,   vk::Format::eAstc8x5SrgbBlock,
         vk::Format::eAstc8x6UnormBlock,   vk::Format::eAstc8x6SrgbBlock,
-        vk::Format::eAstc6x5UnormBlock,   vk::Format::eAstc6x5SrgbBlock};
+        vk::Format::eAstc8x8UnormBlock,   vk::Format::eAstc8x8SrgbBlock,
+        vk::Format::eAstc10x5UnormBlock,  vk::Format::eAstc10x5SrgbBlock,
+        vk::Format::eAstc10x6UnormBlock,  vk::Format::eAstc10x6SrgbBlock,
+        vk::Format::eAstc10x8UnormBlock,  vk::Format::eAstc10x8SrgbBlock,
+        vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
+        vk::Format::eAstc12x10UnormBlock, vk::Format::eAstc12x10SrgbBlock,
+        vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock};
     if (!features.textureCompressionASTC_LDR) {
         return false;
     }
@@ -257,7 +259,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
         vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
         vk::FormatFeatureFlagBits::eTransferDst};
     for (const auto format : astc_formats) {
-        const auto format_properties{physical.getFormatProperties(format, dldi)};
+        const auto format_properties{physical.getFormatProperties(format, dld)};
         if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
             return false;
         }
@@ -276,11 +278,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
     return (supported_usage & wanted_usage) == wanted_usage;
 }
 
-bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
-                          vk::SurfaceKHR surface) {
-    bool is_suitable = true;
-
-    constexpr std::array required_extensions = {
+bool VKDevice::IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface,
+                          const vk::DispatchLoaderDynamic& dld) {
+    static constexpr std::array required_extensions = {
         VK_KHR_SWAPCHAIN_EXTENSION_NAME,
         VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
         VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
@@ -290,9 +290,10 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
         VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
         VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
     };
+    bool is_suitable = true;
     std::bitset<required_extensions.size()> available_extensions{};
 
-    for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
+    for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dld)) {
         for (std::size_t i = 0; i < required_extensions.size(); ++i) {
             if (available_extensions[i]) {
                 continue;
@@ -312,7 +313,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
     }
 
     bool has_graphics{}, has_present{};
-    const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+    const auto queue_family_properties = physical.getQueueFamilyProperties(dld);
     for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
         const auto& family = queue_family_properties[i];
         if (family.queueCount == 0) {
@@ -320,7 +321,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
         }
         has_graphics |=
             (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
-        has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
+        has_present |= physical.getSurfaceSupportKHR(i, surface, dld) != 0;
     }
     if (!has_graphics || !has_present) {
         LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
@@ -328,7 +329,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
     }
 
     // TODO(Rodrigo): Check if the device matches all requeriments.
-    const auto properties{physical.getProperties(dldi)};
+    const auto properties{physical.getProperties(dld)};
     const auto& limits{properties.limits};
 
     constexpr u32 required_ubo_size = 65536;
@@ -345,7 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
         is_suitable = false;
     }
 
-    const auto features{physical.getFeatures(dldi)};
+    const auto features{physical.getFeatures(dld)};
     const std::array feature_report = {
         std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
         std::make_pair(features.independentBlend, "independentBlend"),
@@ -377,7 +378,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
     return is_suitable;
 }
 
-std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
+std::vector<const char*> VKDevice::LoadExtensions() {
     std::vector<const char*> extensions;
     const auto Test = [&](const vk::ExtensionProperties& extension,
                           std::optional<std::reference_wrapper<bool>> status, const char* name,
@@ -408,7 +409,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
     bool has_khr_shader_float16_int8{};
     bool has_ext_subgroup_size_control{};
     bool has_ext_transform_feedback{};
-    for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
+    for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dld)) {
         Test(extension, khr_uniform_buffer_standard_layout,
              VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
         Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
@@ -430,15 +431,15 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
 
     if (has_khr_shader_float16_int8) {
         is_float16_supported =
-            GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
+            GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dld).shaderFloat16;
         extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
     }
 
     if (has_ext_subgroup_size_control) {
         const auto features =
-            GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
+            GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dld);
         const auto properties =
-            GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi);
+            GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dld);
 
         is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize;
 
@@ -453,9 +454,9 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
 
     if (has_ext_transform_feedback) {
         const auto features =
-            GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
+            GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dld);
         const auto properties =
-            GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
+            GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dld);
 
         if (features.transformFeedback && features.geometryStreams &&
             properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
@@ -468,10 +469,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
     return extensions;
 }
 
-void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
+void VKDevice::SetupFamilies(vk::SurfaceKHR surface) {
     std::optional<u32> graphics_family_, present_family_;
 
-    const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+    const auto queue_family_properties = physical.getQueueFamilyProperties(dld);
     for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
         if (graphics_family_ && present_family_)
             break;
@@ -480,10 +481,12 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
         if (queue_family.queueCount == 0)
             continue;
 
-        if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
+        if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) {
             graphics_family_ = i;
-        if (physical.getSurfaceSupportKHR(i, surface, dldi))
+        }
+        if (physical.getSurfaceSupportKHR(i, surface, dld)) {
             present_family_ = i;
+        }
     }
     ASSERT(graphics_family_ && present_family_);
 
@@ -491,10 +494,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
     present_family = *present_family_;
 }
 
-void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
-    const auto supported_features{physical.getFeatures(dldi)};
+void VKDevice::SetupFeatures() {
+    const auto supported_features{physical.getFeatures(dld)};
     is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
-    is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
+    is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
 }
 
 void VKDevice::CollectTelemetryParameters() {
@@ -522,7 +525,7 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
 }
 
 std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
-    const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
+    const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical) {
     static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
                                         vk::Format::eA8B8G8R8UintPack32,
                                         vk::Format::eA8B8G8R8SnormPack32,
@@ -572,28 +575,38 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
                                         vk::Format::eBc2SrgbBlock,
                                         vk::Format::eBc3SrgbBlock,
                                         vk::Format::eBc7SrgbBlock,
+                                        vk::Format::eAstc4x4UnormBlock,
                                         vk::Format::eAstc4x4SrgbBlock,
-                                        vk::Format::eAstc8x8SrgbBlock,
-                                        vk::Format::eAstc8x5SrgbBlock,
+                                        vk::Format::eAstc5x4UnormBlock,
                                         vk::Format::eAstc5x4SrgbBlock,
                                         vk::Format::eAstc5x5UnormBlock,
                                         vk::Format::eAstc5x5SrgbBlock,
-                                        vk::Format::eAstc10x8UnormBlock,
-                                        vk::Format::eAstc10x8SrgbBlock,
+                                        vk::Format::eAstc6x5UnormBlock,
+                                        vk::Format::eAstc6x5SrgbBlock,
                                         vk::Format::eAstc6x6UnormBlock,
                                         vk::Format::eAstc6x6SrgbBlock,
+                                        vk::Format::eAstc8x5UnormBlock,
+                                        vk::Format::eAstc8x5SrgbBlock,
+                                        vk::Format::eAstc8x6UnormBlock,
+                                        vk::Format::eAstc8x6SrgbBlock,
+                                        vk::Format::eAstc8x8UnormBlock,
+                                        vk::Format::eAstc8x8SrgbBlock,
+                                        vk::Format::eAstc10x5UnormBlock,
+                                        vk::Format::eAstc10x5SrgbBlock,
+                                        vk::Format::eAstc10x6UnormBlock,
+                                        vk::Format::eAstc10x6SrgbBlock,
+                                        vk::Format::eAstc10x8UnormBlock,
+                                        vk::Format::eAstc10x8SrgbBlock,
                                         vk::Format::eAstc10x10UnormBlock,
                                         vk::Format::eAstc10x10SrgbBlock,
+                                        vk::Format::eAstc12x10UnormBlock,
+                                        vk::Format::eAstc12x10SrgbBlock,
                                         vk::Format::eAstc12x12UnormBlock,
                                         vk::Format::eAstc12x12SrgbBlock,
-                                        vk::Format::eAstc8x6UnormBlock,
-                                        vk::Format::eAstc8x6SrgbBlock,
-                                        vk::Format::eAstc6x5UnormBlock,
-                                        vk::Format::eAstc6x5SrgbBlock,
                                         vk::Format::eE5B9G9R9UfloatPack32};
     std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
     for (const auto format : formats) {
-        format_properties.emplace(format, physical.getFormatProperties(format, dldi));
+        format_properties.emplace(format, physical.getFormatProperties(format, dld));
     }
     return format_properties;
 }
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 6e656517f..d9d809852 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -22,12 +22,12 @@ const u32 GuestWarpSize = 32;
 /// Handles data specific to a physical device.
 class VKDevice final {
 public:
-    explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+    explicit VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical,
                       vk::SurfaceKHR surface);
     ~VKDevice();
 
     /// Initializes the device. Returns true on success.
-    bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
+    bool Create(vk::Instance instance);
 
     /**
      * Returns a format supported by the device for the passed requeriments.
@@ -188,18 +188,18 @@ public:
     }
 
     /// Checks if the physical device is suitable.
-    static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
-                           vk::SurfaceKHR surface);
+    static bool IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface,
+                           const vk::DispatchLoaderDynamic& dld);
 
 private:
     /// Loads extensions into a vector and stores available ones in this object.
-    std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi);
+    std::vector<const char*> LoadExtensions();
 
     /// Sets up queue families.
-    void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
+    void SetupFamilies(vk::SurfaceKHR surface);
 
     /// Sets up device features.
-    void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
+    void SetupFeatures();
 
     /// Collects telemetry information from the device.
     void CollectTelemetryParameters();
@@ -208,8 +208,7 @@ private:
     std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
 
     /// Returns true if ASTC textures are natively supported.
-    bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
-                                const vk::DispatchLoaderDynamic& dldi) const;
+    bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const;
 
     /// Returns true if a format is supported.
     bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
@@ -217,10 +216,10 @@ private:
 
     /// Returns the device properties for Vulkan formats.
     static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
-        const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
+        const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical);
 
-    const vk::PhysicalDevice physical;        ///< Physical device.
     vk::DispatchLoaderDynamic dld;            ///< Device function pointers.
+    vk::PhysicalDevice physical;              ///< Physical device.
     vk::PhysicalDeviceProperties properties;  ///< Device properties.
     UniqueDevice logical;                     ///< Logical device.
     vk::Queue graphics_queue;                 ///< Main graphics queue.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 557b9d662..c2a426aeb 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
 } // Anonymous namespace
 
 CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
-                           GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
-                           ProgramCode program_code, u32 main_offset)
-    : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
-      program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
-      shader_ir{this->program_code, main_offset, compiler_settings, registry},
+                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
+                           u32 main_offset)
+    : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
+      registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
+                                                           compiler_settings, registry},
       entries{GenerateShaderEntries(shader_ir)} {}
 
 CachedShader::~CachedShader() = default;
@@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
 
         auto& memory_manager{system.GPU().MemoryManager()};
         const GPUVAddr program_addr{GetShaderAddress(system, program)};
-        const auto host_ptr{memory_manager.GetPointer(program_addr)};
-        auto shader = TryGet(host_ptr);
+        const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+        ASSERT(cpu_addr);
+        auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
         if (!shader) {
+            const auto host_ptr{memory_manager.GetPointer(program_addr)};
+
             // No shader found - create a new one
             constexpr u32 stage_offset = 10;
             const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
             auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
 
-            const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
-            ASSERT(cpu_addr);
-
             shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
-                                                    host_ptr, std::move(code), stage_offset);
+                                                    std::move(code), stage_offset);
             Register(shader);
         }
         shaders[index] = std::move(shader);
@@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
 
     auto& memory_manager = system.GPU().MemoryManager();
     const auto program_addr = key.shader;
-    const auto host_ptr = memory_manager.GetPointer(program_addr);
 
-    auto shader = TryGet(host_ptr);
+    const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+    ASSERT(cpu_addr);
+
+    auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
     if (!shader) {
         // No shader found - create a new one
-        const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
-        ASSERT(cpu_addr);
+        const auto host_ptr = memory_manager.GetPointer(program_addr);
 
         auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
         constexpr u32 kernel_main_offset = 0;
         shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
-                                                program_addr, *cpu_addr, host_ptr, std::move(code),
+                                                program_addr, *cpu_addr, std::move(code),
                                                 kernel_main_offset);
         Register(shader);
     }
@@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
         }
 
         const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
-        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
-        const auto shader = TryGet(host_ptr);
+        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+        ASSERT(cpu_addr);
+        const auto shader = TryGet(*cpu_addr);
         ASSERT(shader);
 
         const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index c4c112290..27c01732f 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -113,17 +113,13 @@ namespace Vulkan {
 class CachedShader final : public RasterizerCacheObject {
 public:
     explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
-                          VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
+                          VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
     ~CachedShader();
 
     GPUVAddr GetGpuAddr() const {
         return gpu_addr;
     }
 
-    VAddr GetCpuAddr() const override {
-        return cpu_addr;
-    }
-
     std::size_t GetSizeInBytes() const override {
         return program_code.size() * sizeof(u64);
     }
@@ -149,7 +145,6 @@ private:
                                                                  Tegra::Engines::ShaderType stage);
 
     GPUVAddr gpu_addr{};
-    VAddr cpu_addr{};
     ProgramCode program_code;
     VideoCommon::Shader::Registry registry;
     VideoCommon::Shader::ShaderIR shader_ir;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 58c69b786..0a2ea4fd4 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
 
 void RasterizerVulkan::FlushAll() {}
 
-void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
+void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
+    if (addr == 0 || size == 0) {
+        return;
+    }
     texture_cache.FlushRegion(addr, size);
     buffer_cache.FlushRegion(addr, size);
     query_cache.FlushRegion(addr, size);
 }
 
-void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
+void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
+    if (addr == 0 || size == 0) {
+        return;
+    }
     texture_cache.InvalidateRegion(addr, size);
     pipeline_cache.InvalidateRegion(addr, size);
     buffer_cache.InvalidateRegion(addr, size);
     query_cache.InvalidateRegion(addr, size);
 }
 
-void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     FlushRegion(addr, size);
     InvalidateRegion(addr, size);
 }
@@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
         return false;
     }
 
-    const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)};
-    const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
+    const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
     if (!surface) {
         return false;
     }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 3185868e9..f642dde76 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -118,9 +118,9 @@ public:
     void ResetCounter(VideoCore::QueryType type) override;
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void FlushAll() override;
-    void FlushRegion(CacheAddr addr, u64 size) override;
-    void InvalidateRegion(CacheAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void FlushCommands() override;
     void TickFrame() override;
     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 51ecb5567..b9f9e2714 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -35,7 +35,7 @@ namespace {
 using Sirit::Id;
 using Tegra::Engines::ShaderType;
 using Tegra::Shader::Attribute;
-using Tegra::Shader::AttributeUse;
+using Tegra::Shader::PixelImap;
 using Tegra::Shader::Register;
 using namespace VideoCommon::Shader;
 
@@ -752,16 +752,16 @@ private:
             if (stage != ShaderType::Fragment) {
                 continue;
             }
-            switch (header.ps.GetAttributeUse(location)) {
-            case AttributeUse::Constant:
+            switch (header.ps.GetPixelImap(location)) {
+            case PixelImap::Constant:
                 Decorate(id, spv::Decoration::Flat);
                 break;
-            case AttributeUse::ScreenLinear:
-                Decorate(id, spv::Decoration::NoPerspective);
-                break;
-            case AttributeUse::Perspective:
+            case PixelImap::Perspective:
                 // Default
                 break;
+            case PixelImap::ScreenLinear:
+                Decorate(id, spv::Decoration::NoPerspective);
+                break;
             default:
                 UNREACHABLE_MSG("Unused attribute being fetched");
             }
@@ -1145,9 +1145,6 @@ private:
             switch (attribute) {
             case Attribute::Index::Position: {
                 if (stage == ShaderType::Fragment) {
-                    if (element == 3) {
-                        return {Constant(t_float, 1.0f), Type::Float};
-                    }
                     return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
                             Type::Float};
                 }
@@ -1941,7 +1938,11 @@ private:
         return {};
     }
 
-    Expression AtomicAdd(Operation operation) {
+    template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type,
+              Type value_type = result_type>
+    Expression Atomic(Operation operation) {
+        const Id type_def = GetTypeDefinition(result_type);
+
         Id pointer;
         if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
             pointer = GetSharedMemoryPointer(*smem);
@@ -1949,14 +1950,15 @@ private:
             pointer = GetGlobalMemoryPointer(*gmem);
         } else {
             UNREACHABLE();
-            return {Constant(t_uint, 0), Type::Uint};
+            return {Constant(type_def, 0), result_type};
         }
 
+        const Id value = As(Visit(operation[1]), value_type);
+
         const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
-        const Id semantics = Constant(t_uint, 0U);
+        const Id semantics = Constant(type_def, 0);
 
-        const Id value = AsUint(Visit(operation[1]));
-        return {OpAtomicIAdd(t_uint, pointer, scope, semantics, value), Type::Uint};
+        return {(this->*func)(type_def, pointer, scope, semantics, value), result_type};
     }
 
     Expression Branch(Operation operation) {
@@ -2545,7 +2547,21 @@ private:
         &SPIRVDecompiler::AtomicImageXor,
         &SPIRVDecompiler::AtomicImageExchange,
 
-        &SPIRVDecompiler::AtomicAdd,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>,
+
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>,
 
         &SPIRVDecompiler::Branch,
         &SPIRVDecompiler::BranchIndirect,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 26175921b..5b9b39670 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -35,7 +35,6 @@ using VideoCore::MortonSwizzleMode;
 
 using Tegra::Texture::SwizzleSource;
 using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::SurfaceCompression;
 using VideoCore::Surface::SurfaceTarget;
 
 namespace {
@@ -96,9 +95,10 @@ vk::ImageViewType GetImageViewType(SurfaceTarget target) {
     return {};
 }
 
-UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
+UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
+                          std::size_t host_memory_size) {
     // TODO(Rodrigo): Move texture buffer creation to the buffer cache
-    const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(),
+    const vk::BufferCreateInfo buffer_ci({}, host_memory_size,
                                          vk::BufferUsageFlagBits::eUniformTexelBuffer |
                                              vk::BufferUsageFlagBits::eTransferSrc |
                                              vk::BufferUsageFlagBits::eTransferDst,
@@ -110,12 +110,13 @@ UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
 
 vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
                                                       const SurfaceParams& params,
-                                                      vk::Buffer buffer) {
+                                                      vk::Buffer buffer,
+                                                      std::size_t host_memory_size) {
     ASSERT(params.IsBuffer());
 
     const auto format =
         MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
-    return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes());
+    return vk::BufferViewCreateInfo({}, buffer, format, 0, host_memory_size);
 }
 
 vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
@@ -169,14 +170,15 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
                              VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
                              VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
                              GPUVAddr gpu_addr, const SurfaceParams& params)
-    : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device},
-      resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
-      staging_pool{staging_pool} {
+    : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system},
+      device{device}, resource_manager{resource_manager},
+      memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
     if (params.IsBuffer()) {
-        buffer = CreateBuffer(device, params);
+        buffer = CreateBuffer(device, params, host_memory_size);
         commit = memory_manager.Commit(*buffer, false);
 
-        const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer);
+        const auto buffer_view_ci =
+            GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size);
         format = buffer_view_ci.format;
 
         const auto dev = device.GetLogical();
@@ -255,7 +257,7 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
     std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
 
     scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
-                      size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) {
+                      size = host_memory_size](auto cmdbuf, auto& dld) {
         const vk::BufferCopy copy(0, 0, size);
         cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld);
 
@@ -299,10 +301,7 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
 
 vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
     const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
-    const auto compression_type = params.GetCompressionType();
-    const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
-                                       ? params.GetConvertedMipmapOffset(level)
-                                       : params.GetHostMipmapLevelOffset(level);
+    const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
 
     return vk::BufferImageCopy(
         mip_offset, 0, 0,
@@ -390,8 +389,9 @@ VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterf
                                const VKDevice& device, VKResourceManager& resource_manager,
                                VKMemoryManager& memory_manager, VKScheduler& scheduler,
                                VKStagingBufferPool& staging_pool)
-    : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager},
-      memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {}
+    : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device},
+      resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
+      staging_pool{staging_pool} {}
 
 VKTextureCache::~VKTextureCache() = default;
 
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
new file mode 100644
index 000000000..9b94dfff1
--- /dev/null
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -0,0 +1,750 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <exception>
+#include <memory>
+#include <optional>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan::vk {
+
+namespace {
+
+template <typename T>
+bool Proc(T& result, const InstanceDispatch& dld, const char* proc_name,
+          VkInstance instance = nullptr) noexcept {
+    result = reinterpret_cast<T>(dld.vkGetInstanceProcAddr(instance, proc_name));
+    return result != nullptr;
+}
+
+template <typename T>
+void Proc(T& result, const DeviceDispatch& dld, const char* proc_name, VkDevice device) noexcept {
+    result = reinterpret_cast<T>(dld.vkGetDeviceProcAddr(device, proc_name));
+}
+
+void Load(VkDevice device, DeviceDispatch& dld) noexcept {
+#define X(name) Proc(dld.name, dld, #name, device)
+    X(vkAcquireNextImageKHR);
+    X(vkAllocateCommandBuffers);
+    X(vkAllocateDescriptorSets);
+    X(vkAllocateMemory);
+    X(vkBeginCommandBuffer);
+    X(vkBindBufferMemory);
+    X(vkBindImageMemory);
+    X(vkCmdBeginQuery);
+    X(vkCmdBeginRenderPass);
+    X(vkCmdBeginTransformFeedbackEXT);
+    X(vkCmdBindDescriptorSets);
+    X(vkCmdBindIndexBuffer);
+    X(vkCmdBindPipeline);
+    X(vkCmdBindTransformFeedbackBuffersEXT);
+    X(vkCmdBindVertexBuffers);
+    X(vkCmdBlitImage);
+    X(vkCmdClearAttachments);
+    X(vkCmdCopyBuffer);
+    X(vkCmdCopyBufferToImage);
+    X(vkCmdCopyImage);
+    X(vkCmdCopyImageToBuffer);
+    X(vkCmdDispatch);
+    X(vkCmdDraw);
+    X(vkCmdDrawIndexed);
+    X(vkCmdEndQuery);
+    X(vkCmdEndRenderPass);
+    X(vkCmdEndTransformFeedbackEXT);
+    X(vkCmdFillBuffer);
+    X(vkCmdPipelineBarrier);
+    X(vkCmdPushConstants);
+    X(vkCmdSetBlendConstants);
+    X(vkCmdSetCheckpointNV);
+    X(vkCmdSetDepthBias);
+    X(vkCmdSetDepthBounds);
+    X(vkCmdSetScissor);
+    X(vkCmdSetStencilCompareMask);
+    X(vkCmdSetStencilReference);
+    X(vkCmdSetStencilWriteMask);
+    X(vkCmdSetViewport);
+    X(vkCreateBuffer);
+    X(vkCreateBufferView);
+    X(vkCreateCommandPool);
+    X(vkCreateComputePipelines);
+    X(vkCreateDescriptorPool);
+    X(vkCreateDescriptorSetLayout);
+    X(vkCreateDescriptorUpdateTemplateKHR);
+    X(vkCreateFence);
+    X(vkCreateFramebuffer);
+    X(vkCreateGraphicsPipelines);
+    X(vkCreateImage);
+    X(vkCreateImageView);
+    X(vkCreatePipelineLayout);
+    X(vkCreateQueryPool);
+    X(vkCreateRenderPass);
+    X(vkCreateSampler);
+    X(vkCreateSemaphore);
+    X(vkCreateShaderModule);
+    X(vkCreateSwapchainKHR);
+    X(vkDestroyBuffer);
+    X(vkDestroyBufferView);
+    X(vkDestroyCommandPool);
+    X(vkDestroyDescriptorPool);
+    X(vkDestroyDescriptorSetLayout);
+    X(vkDestroyDescriptorUpdateTemplateKHR);
+    X(vkDestroyFence);
+    X(vkDestroyFramebuffer);
+    X(vkDestroyImage);
+    X(vkDestroyImageView);
+    X(vkDestroyPipeline);
+    X(vkDestroyPipelineLayout);
+    X(vkDestroyQueryPool);
+    X(vkDestroyRenderPass);
+    X(vkDestroySampler);
+    X(vkDestroySemaphore);
+    X(vkDestroyShaderModule);
+    X(vkDestroySwapchainKHR);
+    X(vkDeviceWaitIdle);
+    X(vkEndCommandBuffer);
+    X(vkFreeCommandBuffers);
+    X(vkFreeDescriptorSets);
+    X(vkFreeMemory);
+    X(vkGetBufferMemoryRequirements);
+    X(vkGetDeviceQueue);
+    X(vkGetFenceStatus);
+    X(vkGetImageMemoryRequirements);
+    X(vkGetQueryPoolResults);
+    X(vkGetQueueCheckpointDataNV);
+    X(vkMapMemory);
+    X(vkQueueSubmit);
+    X(vkResetFences);
+    X(vkResetQueryPoolEXT);
+    X(vkUnmapMemory);
+    X(vkUpdateDescriptorSetWithTemplateKHR);
+    X(vkUpdateDescriptorSets);
+    X(vkWaitForFences);
+#undef X
+}
+
+} // Anonymous namespace
+
+bool Load(InstanceDispatch& dld) noexcept {
+#define X(name) Proc(dld.name, dld, #name)
+    return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties);
+#undef X
+}
+
+bool Load(VkInstance instance, InstanceDispatch& dld) noexcept {
+#define X(name) Proc(dld.name, dld, #name, instance)
+    // These functions may fail to load depending on the enabled extensions.
+    // Don't return a failure on these.
+    X(vkCreateDebugUtilsMessengerEXT);
+    X(vkDestroyDebugUtilsMessengerEXT);
+    X(vkDestroySurfaceKHR);
+    X(vkGetPhysicalDeviceFeatures2KHR);
+    X(vkGetPhysicalDeviceProperties2KHR);
+    X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR);
+    X(vkGetPhysicalDeviceSurfaceFormatsKHR);
+    X(vkGetPhysicalDeviceSurfacePresentModesKHR);
+    X(vkGetPhysicalDeviceSurfaceSupportKHR);
+    X(vkGetSwapchainImagesKHR);
+    X(vkQueuePresentKHR);
+
+    return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) &&
+           X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) &&
+           X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) &&
+           X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceProperties) &&
+           X(vkGetPhysicalDeviceQueueFamilyProperties);
+#undef X
+}
+
+const char* Exception::what() const noexcept {
+    return ToString(result);
+}
+
+const char* ToString(VkResult result) noexcept {
+    switch (result) {
+    case VkResult::VK_SUCCESS:
+        return "VK_SUCCESS";
+    case VkResult::VK_NOT_READY:
+        return "VK_NOT_READY";
+    case VkResult::VK_TIMEOUT:
+        return "VK_TIMEOUT";
+    case VkResult::VK_EVENT_SET:
+        return "VK_EVENT_SET";
+    case VkResult::VK_EVENT_RESET:
+        return "VK_EVENT_RESET";
+    case VkResult::VK_INCOMPLETE:
+        return "VK_INCOMPLETE";
+    case VkResult::VK_ERROR_OUT_OF_HOST_MEMORY:
+        return "VK_ERROR_OUT_OF_HOST_MEMORY";
+    case VkResult::VK_ERROR_OUT_OF_DEVICE_MEMORY:
+        return "VK_ERROR_OUT_OF_DEVICE_MEMORY";
+    case VkResult::VK_ERROR_INITIALIZATION_FAILED:
+        return "VK_ERROR_INITIALIZATION_FAILED";
+    case VkResult::VK_ERROR_DEVICE_LOST:
+        return "VK_ERROR_DEVICE_LOST";
+    case VkResult::VK_ERROR_MEMORY_MAP_FAILED:
+        return "VK_ERROR_MEMORY_MAP_FAILED";
+    case VkResult::VK_ERROR_LAYER_NOT_PRESENT:
+        return "VK_ERROR_LAYER_NOT_PRESENT";
+    case VkResult::VK_ERROR_EXTENSION_NOT_PRESENT:
+        return "VK_ERROR_EXTENSION_NOT_PRESENT";
+    case VkResult::VK_ERROR_FEATURE_NOT_PRESENT:
+        return "VK_ERROR_FEATURE_NOT_PRESENT";
+    case VkResult::VK_ERROR_INCOMPATIBLE_DRIVER:
+        return "VK_ERROR_INCOMPATIBLE_DRIVER";
+    case VkResult::VK_ERROR_TOO_MANY_OBJECTS:
+        return "VK_ERROR_TOO_MANY_OBJECTS";
+    case VkResult::VK_ERROR_FORMAT_NOT_SUPPORTED:
+        return "VK_ERROR_FORMAT_NOT_SUPPORTED";
+    case VkResult::VK_ERROR_FRAGMENTED_POOL:
+        return "VK_ERROR_FRAGMENTED_POOL";
+    case VkResult::VK_ERROR_OUT_OF_POOL_MEMORY:
+        return "VK_ERROR_OUT_OF_POOL_MEMORY";
+    case VkResult::VK_ERROR_INVALID_EXTERNAL_HANDLE:
+        return "VK_ERROR_INVALID_EXTERNAL_HANDLE";
+    case VkResult::VK_ERROR_SURFACE_LOST_KHR:
+        return "VK_ERROR_SURFACE_LOST_KHR";
+    case VkResult::VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
+        return "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR";
+    case VkResult::VK_SUBOPTIMAL_KHR:
+        return "VK_SUBOPTIMAL_KHR";
+    case VkResult::VK_ERROR_OUT_OF_DATE_KHR:
+        return "VK_ERROR_OUT_OF_DATE_KHR";
+    case VkResult::VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
+        return "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR";
+    case VkResult::VK_ERROR_VALIDATION_FAILED_EXT:
+        return "VK_ERROR_VALIDATION_FAILED_EXT";
+    case VkResult::VK_ERROR_INVALID_SHADER_NV:
+        return "VK_ERROR_INVALID_SHADER_NV";
+    case VkResult::VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
+        return "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT";
+    case VkResult::VK_ERROR_FRAGMENTATION_EXT:
+        return "VK_ERROR_FRAGMENTATION_EXT";
+    case VkResult::VK_ERROR_NOT_PERMITTED_EXT:
+        return "VK_ERROR_NOT_PERMITTED_EXT";
+    case VkResult::VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
+        return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT";
+    case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT:
+        return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
+    }
+    return "Unknown";
+}
+
+void Destroy(VkInstance instance, const InstanceDispatch& dld) noexcept {
+    dld.vkDestroyInstance(instance, nullptr);
+}
+
+void Destroy(VkDevice device, const InstanceDispatch& dld) noexcept {
+    dld.vkDestroyDevice(device, nullptr);
+}
+
+void Destroy(VkDevice device, VkBuffer handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyBuffer(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkBufferView handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyBufferView(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkCommandPool handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyCommandPool(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkDescriptorPool handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyDescriptorPool(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkDescriptorSetLayout handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyDescriptorSetLayout(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkDescriptorUpdateTemplateKHR handle,
+             const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyDescriptorUpdateTemplateKHR(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld) noexcept {
+    dld.vkFreeMemory(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyFence(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkFramebuffer handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyFramebuffer(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkImage handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyImage(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkImageView handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyImageView(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyPipeline(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyPipelineLayout(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkQueryPool handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyQueryPool(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkRenderPass handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyRenderPass(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkSampler handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroySampler(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkSwapchainKHR handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroySwapchainKHR(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkSemaphore handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroySemaphore(device, handle, nullptr);
+}
+
+void Destroy(VkDevice device, VkShaderModule handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyShaderModule(device, handle, nullptr);
+}
+
+void Destroy(VkInstance instance, VkDebugUtilsMessengerEXT handle,
+             const InstanceDispatch& dld) noexcept {
+    dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr);
+}
+
+void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept {
+    dld.vkDestroySurfaceKHR(instance, handle, nullptr);
+}
+
+VkResult Free(VkDevice device, VkDescriptorPool handle, Span<VkDescriptorSet> sets,
+              const DeviceDispatch& dld) noexcept {
+    return dld.vkFreeDescriptorSets(device, handle, sets.size(), sets.data());
+}
+
+VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffers,
+              const DeviceDispatch& dld) noexcept {
+    dld.vkFreeCommandBuffers(device, handle, buffers.size(), buffers.data());
+    return VK_SUCCESS;
+}
+
+Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions,
+                          InstanceDispatch& dld) noexcept {
+    VkApplicationInfo application_info;
+    application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
+    application_info.pNext = nullptr;
+    application_info.pApplicationName = "yuzu Emulator";
+    application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0);
+    application_info.pEngineName = "yuzu Emulator";
+    application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0);
+    application_info.apiVersion = VK_API_VERSION_1_1;
+
+    VkInstanceCreateInfo ci;
+    ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+    ci.pNext = nullptr;
+    ci.flags = 0;
+    ci.pApplicationInfo = &application_info;
+    ci.enabledLayerCount = layers.size();
+    ci.ppEnabledLayerNames = layers.data();
+    ci.enabledExtensionCount = extensions.size();
+    ci.ppEnabledExtensionNames = extensions.data();
+
+    VkInstance instance;
+    if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) {
+        // Failed to create the instance.
+        return {};
+    }
+    if (!Proc(dld.vkDestroyInstance, dld, "vkDestroyInstance", instance)) {
+        // We successfully created an instance but the destroy function couldn't be loaded.
+        // This is a good moment to panic.
+        return {};
+    }
+
+    return Instance(instance, dld);
+}
+
+std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() {
+    u32 num;
+    if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) {
+        return std::nullopt;
+    }
+    std::vector<VkPhysicalDevice> physical_devices(num);
+    if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) {
+        return std::nullopt;
+    }
+    return physical_devices;
+}
+
+DebugCallback Instance::TryCreateDebugCallback(
+    PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept {
+    VkDebugUtilsMessengerCreateInfoEXT ci;
+    ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
+    ci.pNext = nullptr;
+    ci.flags = 0;
+    ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
+                         VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+                         VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
+                         VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT;
+    ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+                     VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+                     VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
+    ci.pfnUserCallback = callback;
+    ci.pUserData = nullptr;
+
+    VkDebugUtilsMessengerEXT messenger;
+    if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) {
+        return {};
+    }
+    return DebugCallback(messenger, handle, *dld);
+}
+
+std::vector<VkCheckpointDataNV> Queue::GetCheckpointDataNV(const DeviceDispatch& dld) const {
+    if (!dld.vkGetQueueCheckpointDataNV) {
+        return {};
+    }
+    u32 num;
+    dld.vkGetQueueCheckpointDataNV(queue, &num, nullptr);
+    std::vector<VkCheckpointDataNV> checkpoints(num);
+    dld.vkGetQueueCheckpointDataNV(queue, &num, checkpoints.data());
+    return checkpoints;
+}
+
+void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
+    Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
+}
+
+void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
+    Check(dld->vkBindImageMemory(owner, handle, memory, offset));
+}
+
+DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const {
+    const std::size_t num = ai.descriptorSetCount;
+    std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num);
+    switch (const VkResult result = dld->vkAllocateDescriptorSets(owner, &ai, sets.get())) {
+    case VK_SUCCESS:
+        return DescriptorSets(std::move(sets), num, owner, handle, *dld);
+    case VK_ERROR_OUT_OF_POOL_MEMORY:
+        return {};
+    default:
+        throw Exception(result);
+    }
+}
+
+CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
+    VkCommandBufferAllocateInfo ai;
+    ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+    ai.pNext = nullptr;
+    ai.commandPool = handle;
+    ai.level = level;
+    ai.commandBufferCount = static_cast<u32>(num_buffers);
+
+    std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers);
+    switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) {
+    case VK_SUCCESS:
+        return CommandBuffers(std::move(buffers), num_buffers, owner, handle, *dld);
+    case VK_ERROR_OUT_OF_POOL_MEMORY:
+        return {};
+    default:
+        throw Exception(result);
+    }
+}
+
+std::vector<VkImage> SwapchainKHR::GetImages() const {
+    u32 num;
+    Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr));
+    std::vector<VkImage> images(num);
+    Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, images.data()));
+    return images;
+}
+
+Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
+                      Span<const char*> enabled_extensions,
+                      const VkPhysicalDeviceFeatures2& enabled_features,
+                      DeviceDispatch& dld) noexcept {
+    VkDeviceCreateInfo ci;
+    ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
+    ci.pNext = &enabled_features;
+    ci.flags = 0;
+    ci.queueCreateInfoCount = queues_ci.size();
+    ci.pQueueCreateInfos = queues_ci.data();
+    ci.enabledLayerCount = 0;
+    ci.ppEnabledLayerNames = nullptr;
+    ci.enabledExtensionCount = enabled_extensions.size();
+    ci.ppEnabledExtensionNames = enabled_extensions.data();
+    ci.pEnabledFeatures = nullptr;
+
+    VkDevice device;
+    if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) {
+        return {};
+    }
+    Load(device, dld);
+    return Device(device, dld);
+}
+
+Queue Device::GetQueue(u32 family_index) const noexcept {
+    VkQueue queue;
+    dld->vkGetDeviceQueue(handle, family_index, 0, &queue);
+    return Queue(queue, *dld);
+}
+
+Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const {
+    VkBuffer object;
+    Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object));
+    return Buffer(object, handle, *dld);
+}
+
+BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const {
+    VkBufferView object;
+    Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object));
+    return BufferView(object, handle, *dld);
+}
+
+Image Device::CreateImage(const VkImageCreateInfo& ci) const {
+    VkImage object;
+    Check(dld->vkCreateImage(handle, &ci, nullptr, &object));
+    return Image(object, handle, *dld);
+}
+
+ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const {
+    VkImageView object;
+    Check(dld->vkCreateImageView(handle, &ci, nullptr, &object));
+    return ImageView(object, handle, *dld);
+}
+
+Semaphore Device::CreateSemaphore() const {
+    VkSemaphoreCreateInfo ci;
+    ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+    ci.pNext = nullptr;
+    ci.flags = 0;
+
+    VkSemaphore object;
+    Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object));
+    return Semaphore(object, handle, *dld);
+}
+
+Fence Device::CreateFence(const VkFenceCreateInfo& ci) const {
+    VkFence object;
+    Check(dld->vkCreateFence(handle, &ci, nullptr, &object));
+    return Fence(object, handle, *dld);
+}
+
+DescriptorPool Device::CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const {
+    VkDescriptorPool object;
+    Check(dld->vkCreateDescriptorPool(handle, &ci, nullptr, &object));
+    return DescriptorPool(object, handle, *dld);
+}
+
+RenderPass Device::CreateRenderPass(const VkRenderPassCreateInfo& ci) const {
+    VkRenderPass object;
+    Check(dld->vkCreateRenderPass(handle, &ci, nullptr, &object));
+    return RenderPass(object, handle, *dld);
+}
+
+DescriptorSetLayout Device::CreateDescriptorSetLayout(
+    const VkDescriptorSetLayoutCreateInfo& ci) const {
+    VkDescriptorSetLayout object;
+    Check(dld->vkCreateDescriptorSetLayout(handle, &ci, nullptr, &object));
+    return DescriptorSetLayout(object, handle, *dld);
+}
+
+PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const {
+    VkPipelineLayout object;
+    Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object));
+    return PipelineLayout(object, handle, *dld);
+}
+
+Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const {
+    VkPipeline object;
+    Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object));
+    return Pipeline(object, handle, *dld);
+}
+
+Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const {
+    VkPipeline object;
+    Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object));
+    return Pipeline(object, handle, *dld);
+}
+
+Sampler Device::CreateSampler(const VkSamplerCreateInfo& ci) const {
+    VkSampler object;
+    Check(dld->vkCreateSampler(handle, &ci, nullptr, &object));
+    return Sampler(object, handle, *dld);
+}
+
+Framebuffer Device::CreateFramebuffer(const VkFramebufferCreateInfo& ci) const {
+    VkFramebuffer object;
+    Check(dld->vkCreateFramebuffer(handle, &ci, nullptr, &object));
+    return Framebuffer(object, handle, *dld);
+}
+
+CommandPool Device::CreateCommandPool(const VkCommandPoolCreateInfo& ci) const {
+    VkCommandPool object;
+    Check(dld->vkCreateCommandPool(handle, &ci, nullptr, &object));
+    return CommandPool(object, handle, *dld);
+}
+
+DescriptorUpdateTemplateKHR Device::CreateDescriptorUpdateTemplateKHR(
+    const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const {
+    VkDescriptorUpdateTemplateKHR object;
+    Check(dld->vkCreateDescriptorUpdateTemplateKHR(handle, &ci, nullptr, &object));
+    return DescriptorUpdateTemplateKHR(object, handle, *dld);
+}
+
+QueryPool Device::CreateQueryPool(const VkQueryPoolCreateInfo& ci) const {
+    VkQueryPool object;
+    Check(dld->vkCreateQueryPool(handle, &ci, nullptr, &object));
+    return QueryPool(object, handle, *dld);
+}
+
+ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) const {
+    VkShaderModule object;
+    Check(dld->vkCreateShaderModule(handle, &ci, nullptr, &object));
+    return ShaderModule(object, handle, *dld);
+}
+
+SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
+    VkSwapchainKHR object;
+    Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
+    return SwapchainKHR(object, handle, *dld);
+}
+
+DeviceMemory Device::TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept {
+    VkDeviceMemory memory;
+    if (dld->vkAllocateMemory(handle, &ai, nullptr, &memory) != VK_SUCCESS) {
+        return {};
+    }
+    return DeviceMemory(memory, handle, *dld);
+}
+
+DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
+    VkDeviceMemory memory;
+    Check(dld->vkAllocateMemory(handle, &ai, nullptr, &memory));
+    return DeviceMemory(memory, handle, *dld);
+}
+
+VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept {
+    VkMemoryRequirements requirements;
+    dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements);
+    return requirements;
+}
+
+VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
+    VkMemoryRequirements requirements;
+    dld->vkGetImageMemoryRequirements(handle, image, &requirements);
+    return requirements;
+}
+
+void Device::UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes,
+                                  Span<VkCopyDescriptorSet> copies) const noexcept {
+    dld->vkUpdateDescriptorSets(handle, writes.size(), writes.data(), copies.size(), copies.data());
+}
+
+VkPhysicalDeviceProperties PhysicalDevice::GetProperties() const noexcept {
+    VkPhysicalDeviceProperties properties;
+    dld->vkGetPhysicalDeviceProperties(physical_device, &properties);
+    return properties;
+}
+
+void PhysicalDevice::GetProperties2KHR(VkPhysicalDeviceProperties2KHR& properties) const noexcept {
+    dld->vkGetPhysicalDeviceProperties2KHR(physical_device, &properties);
+}
+
+VkPhysicalDeviceFeatures PhysicalDevice::GetFeatures() const noexcept {
+    VkPhysicalDeviceFeatures2KHR features2;
+    features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
+    features2.pNext = nullptr;
+    dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features2);
+    return features2.features;
+}
+
+void PhysicalDevice::GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR& features) const noexcept {
+    dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features);
+}
+
+VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const noexcept {
+    VkFormatProperties properties;
+    dld->vkGetPhysicalDeviceFormatProperties(physical_device, format, &properties);
+    return properties;
+}
+
+std::vector<VkExtensionProperties> PhysicalDevice::EnumerateDeviceExtensionProperties() const {
+    u32 num;
+    dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, nullptr);
+    std::vector<VkExtensionProperties> properties(num);
+    dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, properties.data());
+    return properties;
+}
+
+std::vector<VkQueueFamilyProperties> PhysicalDevice::GetQueueFamilyProperties() const {
+    u32 num;
+    dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, nullptr);
+    std::vector<VkQueueFamilyProperties> properties(num);
+    dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, properties.data());
+    return properties;
+}
+
+bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR surface) const {
+    VkBool32 supported;
+    Check(dld->vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index, surface,
+                                                    &supported));
+    return supported == VK_TRUE;
+}
+
+VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const
+    noexcept {
+    VkSurfaceCapabilitiesKHR capabilities;
+    Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
+    return capabilities;
+}
+
+std::vector<VkSurfaceFormatKHR> PhysicalDevice::GetSurfaceFormatsKHR(VkSurfaceKHR surface) const {
+    u32 num;
+    Check(dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, nullptr));
+    std::vector<VkSurfaceFormatKHR> formats(num);
+    Check(
+        dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, formats.data()));
+    return formats;
+}
+
+std::vector<VkPresentModeKHR> PhysicalDevice::GetSurfacePresentModesKHR(
+    VkSurfaceKHR surface) const {
+    u32 num;
+    Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, nullptr));
+    std::vector<VkPresentModeKHR> modes(num);
+    Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num,
+                                                         modes.data()));
+    return modes;
+}
+
+VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept {
+    VkPhysicalDeviceMemoryProperties properties;
+    dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties);
+    return properties;
+}
+
+std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
+    const InstanceDispatch& dld) {
+    u32 num;
+    if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, nullptr) != VK_SUCCESS) {
+        return std::nullopt;
+    }
+    std::vector<VkExtensionProperties> properties(num);
+    if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, properties.data()) !=
+        VK_SUCCESS) {
+        return std::nullopt;
+    }
+    return properties;
+}
+
+} // namespace Vulkan::vk
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
new file mode 100644
index 000000000..fb3657819
--- /dev/null
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -0,0 +1,987 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <exception>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <optional>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#define VK_NO_PROTOTYPES
+#include <vulkan/vulkan.h>
+
+#include "common/common_types.h"
+
+namespace Vulkan::vk {
+
+/**
+ * Span for Vulkan arrays.
+ * Based on std::span but optimized for array access instead of iterators.
+ * Size returns uint32_t instead of size_t to ease interaction with Vulkan functions.
+ */
+template <typename T>
+class Span {
+public:
+    using value_type = T;
+    using size_type = u32;
+    using difference_type = std::ptrdiff_t;
+    using reference = const T&;
+    using const_reference = const T&;
+    using pointer = const T*;
+    using const_pointer = const T*;
+    using iterator = const T*;
+    using const_iterator = const T*;
+
+    /// Construct an empty span.
+    constexpr Span() noexcept = default;
+
+    /// Construct a span from a single element.
+    constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {}
+
+    /// Construct a span from a range.
+    template <typename Range>
+    // requires std::data(const Range&)
+    // requires std::size(const Range&)
+    constexpr Span(const Range& range) : ptr{std::data(range)}, num{std::size(range)} {}
+
+    /// Construct a span from a pointer and a size.
+    /// This is inteded for subranges.
+    constexpr Span(const T* ptr, std::size_t num) noexcept : ptr{ptr}, num{num} {}
+
+    /// Returns the data pointer by the span.
+    constexpr const T* data() const noexcept {
+        return ptr;
+    }
+
+    /// Returns the number of elements in the span.
+    /// @note Returns a 32 bits integer because most Vulkan functions expect this type.
+    constexpr u32 size() const noexcept {
+        return static_cast<u32>(num);
+    }
+
+    /// Returns true when the span is empty.
+    constexpr bool empty() const noexcept {
+        return num == 0;
+    }
+
+    /// Returns a reference to the element in the passed index.
+    /// @pre: index < size()
+    constexpr const T& operator[](std::size_t index) const noexcept {
+        return ptr[index];
+    }
+
+    /// Returns an iterator to the beginning of the span.
+    constexpr const T* begin() const noexcept {
+        return ptr;
+    }
+
+    /// Returns an iterator to the end of the span.
+    constexpr const T* end() const noexcept {
+        return ptr + num;
+    }
+
+    /// Returns an iterator to the beginning of the span.
+    constexpr const T* cbegin() const noexcept {
+        return ptr;
+    }
+
+    /// Returns an iterator to the end of the span.
+    constexpr const T* cend() const noexcept {
+        return ptr + num;
+    }
+
+private:
+    const T* ptr = nullptr;
+    std::size_t num = 0;
+};
+
+/// Vulkan exception generated from a VkResult.
+class Exception final : public std::exception {
+public:
+    /// Construct the exception with a result.
+    /// @pre result != VK_SUCCESS
+    explicit Exception(VkResult result_) : result{result_} {}
+    virtual ~Exception() = default;
+
+    const char* what() const noexcept override;
+
+private:
+    VkResult result;
+};
+
+/// Converts a VkResult enum into a rodata string
+const char* ToString(VkResult) noexcept;
+
+/// Throws a Vulkan exception if result is not success.
+inline void Check(VkResult result) {
+    if (result != VK_SUCCESS) {
+        throw Exception(result);
+    }
+}
+
+/// Throws a Vulkan exception if result is an error.
+/// @return result
+inline VkResult Filter(VkResult result) {
+    if (result < 0) {
+        throw Exception(result);
+    }
+    return result;
+}
+
+/// Table holding Vulkan instance function pointers.
+struct InstanceDispatch {
+    PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
+
+    PFN_vkCreateInstance vkCreateInstance;
+    PFN_vkDestroyInstance vkDestroyInstance;
+    PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties;
+
+    PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT;
+    PFN_vkCreateDevice vkCreateDevice;
+    PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT;
+    PFN_vkDestroyDevice vkDestroyDevice;
+    PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR;
+    PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties;
+    PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices;
+    PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr;
+    PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
+    PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties;
+    PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties;
+    PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties;
+    PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
+    PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties;
+    PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR;
+    PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR;
+    PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR;
+    PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR;
+    PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR;
+    PFN_vkQueuePresentKHR vkQueuePresentKHR;
+};
+
+/// Table holding Vulkan device function pointers.
+struct DeviceDispatch : public InstanceDispatch {
+    PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR;
+    PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers;
+    PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets;
+    PFN_vkAllocateMemory vkAllocateMemory;
+    PFN_vkBeginCommandBuffer vkBeginCommandBuffer;
+    PFN_vkBindBufferMemory vkBindBufferMemory;
+    PFN_vkBindImageMemory vkBindImageMemory;
+    PFN_vkCmdBeginQuery vkCmdBeginQuery;
+    PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass;
+    PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT;
+    PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets;
+    PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer;
+    PFN_vkCmdBindPipeline vkCmdBindPipeline;
+    PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT;
+    PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers;
+    PFN_vkCmdBlitImage vkCmdBlitImage;
+    PFN_vkCmdClearAttachments vkCmdClearAttachments;
+    PFN_vkCmdCopyBuffer vkCmdCopyBuffer;
+    PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage;
+    PFN_vkCmdCopyImage vkCmdCopyImage;
+    PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer;
+    PFN_vkCmdDispatch vkCmdDispatch;
+    PFN_vkCmdDraw vkCmdDraw;
+    PFN_vkCmdDrawIndexed vkCmdDrawIndexed;
+    PFN_vkCmdEndQuery vkCmdEndQuery;
+    PFN_vkCmdEndRenderPass vkCmdEndRenderPass;
+    PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT;
+    PFN_vkCmdFillBuffer vkCmdFillBuffer;
+    PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;
+    PFN_vkCmdPushConstants vkCmdPushConstants;
+    PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
+    PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV;
+    PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
+    PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
+    PFN_vkCmdSetScissor vkCmdSetScissor;
+    PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
+    PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
+    PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask;
+    PFN_vkCmdSetViewport vkCmdSetViewport;
+    PFN_vkCreateBuffer vkCreateBuffer;
+    PFN_vkCreateBufferView vkCreateBufferView;
+    PFN_vkCreateCommandPool vkCreateCommandPool;
+    PFN_vkCreateComputePipelines vkCreateComputePipelines;
+    PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
+    PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
+    PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
+    PFN_vkCreateFence vkCreateFence;
+    PFN_vkCreateFramebuffer vkCreateFramebuffer;
+    PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
+    PFN_vkCreateImage vkCreateImage;
+    PFN_vkCreateImageView vkCreateImageView;
+    PFN_vkCreatePipelineLayout vkCreatePipelineLayout;
+    PFN_vkCreateQueryPool vkCreateQueryPool;
+    PFN_vkCreateRenderPass vkCreateRenderPass;
+    PFN_vkCreateSampler vkCreateSampler;
+    PFN_vkCreateSemaphore vkCreateSemaphore;
+    PFN_vkCreateShaderModule vkCreateShaderModule;
+    PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR;
+    PFN_vkDestroyBuffer vkDestroyBuffer;
+    PFN_vkDestroyBufferView vkDestroyBufferView;
+    PFN_vkDestroyCommandPool vkDestroyCommandPool;
+    PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
+    PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
+    PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
+    PFN_vkDestroyFence vkDestroyFence;
+    PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
+    PFN_vkDestroyImage vkDestroyImage;
+    PFN_vkDestroyImageView vkDestroyImageView;
+    PFN_vkDestroyPipeline vkDestroyPipeline;
+    PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout;
+    PFN_vkDestroyQueryPool vkDestroyQueryPool;
+    PFN_vkDestroyRenderPass vkDestroyRenderPass;
+    PFN_vkDestroySampler vkDestroySampler;
+    PFN_vkDestroySemaphore vkDestroySemaphore;
+    PFN_vkDestroyShaderModule vkDestroyShaderModule;
+    PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR;
+    PFN_vkDeviceWaitIdle vkDeviceWaitIdle;
+    PFN_vkEndCommandBuffer vkEndCommandBuffer;
+    PFN_vkFreeCommandBuffers vkFreeCommandBuffers;
+    PFN_vkFreeDescriptorSets vkFreeDescriptorSets;
+    PFN_vkFreeMemory vkFreeMemory;
+    PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
+    PFN_vkGetDeviceQueue vkGetDeviceQueue;
+    PFN_vkGetFenceStatus vkGetFenceStatus;
+    PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
+    PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
+    PFN_vkGetQueueCheckpointDataNV vkGetQueueCheckpointDataNV;
+    PFN_vkMapMemory vkMapMemory;
+    PFN_vkQueueSubmit vkQueueSubmit;
+    PFN_vkResetFences vkResetFences;
+    PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT;
+    PFN_vkUnmapMemory vkUnmapMemory;
+    PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
+    PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets;
+    PFN_vkWaitForFences vkWaitForFences;
+};
+
+/// Loads instance agnostic function pointers.
+/// @return True on success, false on error.
+bool Load(InstanceDispatch&) noexcept;
+
+/// Loads instance function pointers.
+/// @return True on success, false on error.
+bool Load(VkInstance, InstanceDispatch&) noexcept;
+
+void Destroy(VkInstance, const InstanceDispatch&) noexcept;
+void Destroy(VkDevice, const InstanceDispatch&) noexcept;
+
+void Destroy(VkDevice, VkBuffer, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkBufferView, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkCommandPool, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkSampler, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkSwapchainKHR, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkSemaphore, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkShaderModule, const DeviceDispatch&) noexcept;
+void Destroy(VkInstance, VkDebugUtilsMessengerEXT, const InstanceDispatch&) noexcept;
+void Destroy(VkInstance, VkSurfaceKHR, const InstanceDispatch&) noexcept;
+
+VkResult Free(VkDevice, VkDescriptorPool, Span<VkDescriptorSet>, const DeviceDispatch&) noexcept;
+VkResult Free(VkDevice, VkCommandPool, Span<VkCommandBuffer>, const DeviceDispatch&) noexcept;
+
+template <typename Type, typename OwnerType, typename Dispatch>
+class Handle;
+
+/// Handle with an owning type.
+/// Analogue to std::unique_ptr.
+template <typename Type, typename OwnerType, typename Dispatch>
+class Handle {
+public:
+    /// Construct a handle and hold it's ownership.
+    explicit Handle(Type handle_, OwnerType owner_, const Dispatch& dld_) noexcept
+        : handle{handle_}, owner{owner_}, dld{&dld_} {}
+
+    /// Construct an empty handle.
+    Handle() = default;
+
+    /// Copying Vulkan objects is not supported and will never be.
+    Handle(const Handle&) = delete;
+    Handle& operator=(const Handle&) = delete;
+
+    /// Construct a handle transfering the ownership from another handle.
+    Handle(Handle&& rhs) noexcept
+        : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, dld{rhs.dld} {}
+
+    /// Assign the current handle transfering the ownership from another handle.
+    /// Destroys any previously held object.
+    Handle& operator=(Handle&& rhs) noexcept {
+        Release();
+        handle = std::exchange(rhs.handle, nullptr);
+        owner = rhs.owner;
+        dld = rhs.dld;
+        return *this;
+    }
+
+    /// Destroys the current handle if it existed.
+    ~Handle() noexcept {
+        Release();
+    }
+
+    /// Destroys any held object.
+    void reset() noexcept {
+        Release();
+        handle = nullptr;
+    }
+
+    /// Returns the address of the held object.
+    /// Intended for Vulkan structures that expect a pointer to an array.
+    const Type* address() const noexcept {
+        return std::addressof(handle);
+    }
+
+    /// Returns the held Vulkan handle.
+    Type operator*() const noexcept {
+        return handle;
+    }
+
+    /// Returns true when there's a held object.
+    explicit operator bool() const noexcept {
+        return handle != nullptr;
+    }
+
+protected:
+    Type handle = nullptr;
+    OwnerType owner = nullptr;
+    const Dispatch* dld = nullptr;
+
+private:
+    /// Destroys the held object if it exists.
+    void Release() noexcept {
+        if (handle) {
+            Destroy(owner, handle, *dld);
+        }
+    }
+};
+
+/// Dummy type used to specify a handle has no owner.
+struct NoOwner {};
+
+/// Handle without an owning type.
+/// Analogue to std::unique_ptr
+template <typename Type, typename Dispatch>
+class Handle<Type, NoOwner, Dispatch> {
+public:
+    /// Construct a handle and hold it's ownership.
+    explicit Handle(Type handle_, const Dispatch& dld_) noexcept : handle{handle_}, dld{&dld_} {}
+
+    /// Construct an empty handle.
+    Handle() noexcept = default;
+
+    /// Copying Vulkan objects is not supported and will never be.
+    Handle(const Handle&) = delete;
+    Handle& operator=(const Handle&) = delete;
+
+    /// Construct a handle transfering ownership from another handle.
+    Handle(Handle&& rhs) noexcept : handle{std::exchange(rhs.handle, nullptr)}, dld{rhs.dld} {}
+
+    /// Assign the current handle transfering the ownership from another handle.
+    /// Destroys any previously held object.
+    Handle& operator=(Handle&& rhs) noexcept {
+        Release();
+        handle = std::exchange(rhs.handle, nullptr);
+        dld = rhs.dld;
+        return *this;
+    }
+
+    /// Destroys the current handle if it existed.
+    ~Handle() noexcept {
+        Release();
+    }
+
+    /// Destroys any held object.
+    void reset() noexcept {
+        Release();
+        handle = nullptr;
+    }
+
+    /// Returns the address of the held object.
+    /// Intended for Vulkan structures that expect a pointer to an array.
+    const Type* address() const noexcept {
+        return std::addressof(handle);
+    }
+
+    /// Returns the held Vulkan handle.
+    Type operator*() const noexcept {
+        return handle;
+    }
+
+    /// Returns true when there's a held object.
+    operator bool() const noexcept {
+        return handle != nullptr;
+    }
+
+protected:
+    Type handle = nullptr;
+    const Dispatch* dld = nullptr;
+
+private:
+    /// Destroys the held object if it exists.
+    void Release() noexcept {
+        if (handle) {
+            Destroy(handle, *dld);
+        }
+    }
+};
+
+/// Array of a pool allocation.
+/// Analogue to std::vector
+template <typename AllocationType, typename PoolType>
+class PoolAllocations {
+public:
+    /// Construct an empty allocation.
+    PoolAllocations() = default;
+
+    /// Construct an allocation. Errors are reported through IsOutOfPoolMemory().
+    explicit PoolAllocations(std::unique_ptr<AllocationType[]> allocations, std::size_t num,
+                             VkDevice device, PoolType pool, const DeviceDispatch& dld) noexcept
+        : allocations{std::move(allocations)}, num{num}, device{device}, pool{pool}, dld{&dld} {}
+
+    /// Copying Vulkan allocations is not supported and will never be.
+    PoolAllocations(const PoolAllocations&) = delete;
+    PoolAllocations& operator=(const PoolAllocations&) = delete;
+
+    /// Construct an allocation transfering ownership from another allocation.
+    PoolAllocations(PoolAllocations&& rhs) noexcept
+        : allocations{std::move(rhs.allocations)}, num{rhs.num}, device{rhs.device}, pool{rhs.pool},
+          dld{rhs.dld} {}
+
+    /// Assign an allocation transfering ownership from another allocation.
+    /// Releases any previously held allocation.
+    PoolAllocations& operator=(PoolAllocations&& rhs) noexcept {
+        Release();
+        allocations = std::move(rhs.allocations);
+        num = rhs.num;
+        device = rhs.device;
+        pool = rhs.pool;
+        dld = rhs.dld;
+        return *this;
+    }
+
+    /// Destroys any held allocation.
+    ~PoolAllocations() {
+        Release();
+    }
+
+    /// Returns the number of allocations.
+    std::size_t size() const noexcept {
+        return num;
+    }
+
+    /// Returns a pointer to the array of allocations.
+    AllocationType const* data() const noexcept {
+        return allocations.get();
+    }
+
+    /// Returns the allocation in the specified index.
+    /// @pre index < size()
+    AllocationType operator[](std::size_t index) const noexcept {
+        return allocations[index];
+    }
+
+    /// True when a pool fails to construct.
+    bool IsOutOfPoolMemory() const noexcept {
+        return !device;
+    }
+
+private:
+    /// Destroys the held allocations if they exist.
+    void Release() noexcept {
+        if (!allocations) {
+            return;
+        }
+        const Span<AllocationType> span(allocations.get(), num);
+        const VkResult result = Free(device, pool, span, *dld);
+        // There's no way to report errors from a destructor.
+        if (result != VK_SUCCESS) {
+            std::terminate();
+        }
+    }
+
+    std::unique_ptr<AllocationType[]> allocations;
+    std::size_t num = 0;
+    VkDevice device = nullptr;
+    PoolType pool = nullptr;
+    const DeviceDispatch* dld = nullptr;
+};
+
+using BufferView = Handle<VkBufferView, VkDevice, DeviceDispatch>;
+using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
+using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>;
+using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>;
+using Framebuffer = Handle<VkFramebuffer, VkDevice, DeviceDispatch>;
+using ImageView = Handle<VkImageView, VkDevice, DeviceDispatch>;
+using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>;
+using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>;
+using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>;
+using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>;
+using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>;
+using Semaphore = Handle<VkSemaphore, VkDevice, DeviceDispatch>;
+using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>;
+using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>;
+
+using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>;
+using CommandBuffers = PoolAllocations<VkCommandBuffer, VkCommandPool>;
+
+/// Vulkan instance owning handle.
+class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> {
+    using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle;
+
+public:
+    /// Creates a Vulkan instance. Use "operator bool" for error handling.
+    static Instance Create(Span<const char*> layers, Span<const char*> extensions,
+                           InstanceDispatch& dld) noexcept;
+
+    /// Enumerates physical devices.
+    /// @return Physical devices and an empty handle on failure.
+    std::optional<std::vector<VkPhysicalDevice>> EnumeratePhysicalDevices();
+
+    /// Tries to create a debug callback messenger. Returns an empty handle on failure.
+    DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept;
+};
+
+class Queue {
+public:
+    /// Construct an empty queue handle.
+    constexpr Queue() noexcept = default;
+
+    /// Construct a queue handle.
+    constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {}
+
+    /// Returns the checkpoint data.
+    /// @note Returns an empty vector when the function pointer is not present.
+    std::vector<VkCheckpointDataNV> GetCheckpointDataNV(const DeviceDispatch& dld) const;
+
+    void Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const {
+        Check(dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence));
+    }
+
+    VkResult Present(const VkPresentInfoKHR& present_info) const noexcept {
+        return dld->vkQueuePresentKHR(queue, &present_info);
+    }
+
+private:
+    VkQueue queue = nullptr;
+    const DeviceDispatch* dld = nullptr;
+};
+
+class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> {
+    using Handle<VkBuffer, VkDevice, DeviceDispatch>::Handle;
+
+public:
+    /// Attaches a memory allocation.
+    void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
+};
+
+class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
+    using Handle<VkImage, VkDevice, DeviceDispatch>::Handle;
+
+public:
+    /// Attaches a memory allocation.
+    void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
+};
+
+class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
+    using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
+
+public:
+    u8* Map(VkDeviceSize offset, VkDeviceSize size) const {
+        void* data;
+        Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data));
+        return static_cast<u8*>(data);
+    }
+
+    void Unmap() const noexcept {
+        dld->vkUnmapMemory(owner, handle);
+    }
+};
+
+class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> {
+    using Handle<VkFence, VkDevice, DeviceDispatch>::Handle;
+
+public:
+    VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept {
+        return dld->vkWaitForFences(owner, 1, &handle, true, timeout);
+    }
+
+    VkResult GetStatus() const noexcept {
+        return dld->vkGetFenceStatus(owner, handle);
+    }
+
+    void Reset() const {
+        Check(dld->vkResetFences(owner, 1, &handle));
+    }
+};
+
+class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> {
+    using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle;
+
+public:
+    DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const;
+};
+
+class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {
+    using Handle<VkCommandPool, VkDevice, DeviceDispatch>::Handle;
+
+public:
+    CommandBuffers Allocate(std::size_t num_buffers,
+                            VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const;
+};
+
+class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> {
+    using Handle<VkSwapchainKHR, VkDevice, DeviceDispatch>::Handle;
+
+public:
+    std::vector<VkImage> GetImages() const;
+};
+
+class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
+    using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
+
+public:
+    static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
+                         Span<const char*> enabled_extensions,
+                         const VkPhysicalDeviceFeatures2& enabled_features,
+                         DeviceDispatch& dld) noexcept;
+
+    Queue GetQueue(u32 family_index) const noexcept;
+
+    Buffer CreateBuffer(const VkBufferCreateInfo& ci) const;
+
+    BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const;
+
+    Image CreateImage(const VkImageCreateInfo& ci) const;
+
+    ImageView CreateImageView(const VkImageViewCreateInfo& ci) const;
+
+    Semaphore CreateSemaphore() const;
+
+    Fence CreateFence(const VkFenceCreateInfo& ci) const;
+
+    DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const;
+
+    RenderPass CreateRenderPass(const VkRenderPassCreateInfo& ci) const;
+
+    DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const;
+
+    PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const;
+
+    Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const;
+
+    Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const;
+
+    Sampler CreateSampler(const VkSamplerCreateInfo& ci) const;
+
+    Framebuffer CreateFramebuffer(const VkFramebufferCreateInfo& ci) const;
+
+    CommandPool CreateCommandPool(const VkCommandPoolCreateInfo& ci) const;
+
+    DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplateKHR(
+        const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const;
+
+    QueryPool CreateQueryPool(const VkQueryPoolCreateInfo& ci) const;
+
+    ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
+
+    SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
+
+    DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
+
+    DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const;
+
+    VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept;
+
+    VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
+
+    void UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes,
+                              Span<VkCopyDescriptorSet> copies) const noexcept;
+
+    void UpdateDescriptorSet(VkDescriptorSet set, VkDescriptorUpdateTemplateKHR update_template,
+                             const void* data) const noexcept {
+        dld->vkUpdateDescriptorSetWithTemplateKHR(handle, set, update_template, data);
+    }
+
+    VkResult AcquireNextImageKHR(VkSwapchainKHR swapchain, u64 timeout, VkSemaphore semaphore,
+                                 VkFence fence, u32* image_index) const noexcept {
+        return dld->vkAcquireNextImageKHR(handle, swapchain, timeout, semaphore, fence,
+                                          image_index);
+    }
+
+    VkResult WaitIdle() const noexcept {
+        return dld->vkDeviceWaitIdle(handle);
+    }
+
+    void ResetQueryPoolEXT(VkQueryPool query_pool, u32 first, u32 count) const noexcept {
+        dld->vkResetQueryPoolEXT(handle, query_pool, first, count);
+    }
+
+    void GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
+                         void* data, VkDeviceSize stride, VkQueryResultFlags flags) const {
+        Check(dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride,
+                                         flags));
+    }
+
+    template <typename T>
+    T GetQueryResult(VkQueryPool query_pool, u32 first, VkQueryResultFlags flags) const {
+        static_assert(std::is_trivially_copyable_v<T>);
+        T value;
+        GetQueryResults(query_pool, first, 1, sizeof(T), &value, sizeof(T), flags);
+        return value;
+    }
+};
+
+class PhysicalDevice {
+public:
+    constexpr PhysicalDevice() noexcept = default;
+
+    constexpr PhysicalDevice(VkPhysicalDevice physical_device, const InstanceDispatch& dld) noexcept
+        : physical_device{physical_device}, dld{&dld} {}
+
+    constexpr operator VkPhysicalDevice() const noexcept {
+        return physical_device;
+    }
+
+    VkPhysicalDeviceProperties GetProperties() const noexcept;
+
+    void GetProperties2KHR(VkPhysicalDeviceProperties2KHR&) const noexcept;
+
+    VkPhysicalDeviceFeatures GetFeatures() const noexcept;
+
+    void GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR&) const noexcept;
+
+    VkFormatProperties GetFormatProperties(VkFormat) const noexcept;
+
+    std::vector<VkExtensionProperties> EnumerateDeviceExtensionProperties() const;
+
+    std::vector<VkQueueFamilyProperties> GetQueueFamilyProperties() const;
+
+    bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
+
+    VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept;
+
+    std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;
+
+    std::vector<VkPresentModeKHR> GetSurfacePresentModesKHR(VkSurfaceKHR) const;
+
+    VkPhysicalDeviceMemoryProperties GetMemoryProperties() const noexcept;
+
+private:
+    VkPhysicalDevice physical_device = nullptr;
+    const InstanceDispatch* dld = nullptr;
+};
+
+class CommandBuffer {
+public:
+    CommandBuffer() noexcept = default;
+
+    explicit CommandBuffer(VkCommandBuffer handle, const DeviceDispatch& dld) noexcept
+        : handle{handle}, dld{&dld} {}
+
+    const VkCommandBuffer* address() const noexcept {
+        return &handle;
+    }
+
+    void Begin(const VkCommandBufferBeginInfo& begin_info) const {
+        Check(dld->vkBeginCommandBuffer(handle, &begin_info));
+    }
+
+    void End() const {
+        Check(dld->vkEndCommandBuffer(handle));
+    }
+
+    void BeginRenderPass(const VkRenderPassBeginInfo& renderpass_bi,
+                         VkSubpassContents contents) const noexcept {
+        dld->vkCmdBeginRenderPass(handle, &renderpass_bi, contents);
+    }
+
+    void EndRenderPass() const noexcept {
+        dld->vkCmdEndRenderPass(handle);
+    }
+
+    void BeginQuery(VkQueryPool query_pool, u32 query, VkQueryControlFlags flags) const noexcept {
+        dld->vkCmdBeginQuery(handle, query_pool, query, flags);
+    }
+
+    void EndQuery(VkQueryPool query_pool, u32 query) const noexcept {
+        dld->vkCmdEndQuery(handle, query_pool, query);
+    }
+
+    void BindDescriptorSets(VkPipelineBindPoint bind_point, VkPipelineLayout layout, u32 first,
+                            Span<VkDescriptorSet> sets, Span<u32> dynamic_offsets) const noexcept {
+        dld->vkCmdBindDescriptorSets(handle, bind_point, layout, first, sets.size(), sets.data(),
+                                     dynamic_offsets.size(), dynamic_offsets.data());
+    }
+
+    void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept {
+        dld->vkCmdBindPipeline(handle, bind_point, pipeline);
+    }
+
+    void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) const
+        noexcept {
+        dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type);
+    }
+
+    void BindVertexBuffers(u32 first, u32 count, const VkBuffer* buffers,
+                           const VkDeviceSize* offsets) const noexcept {
+        dld->vkCmdBindVertexBuffers(handle, first, count, buffers, offsets);
+    }
+
+    void BindVertexBuffer(u32 binding, VkBuffer buffer, VkDeviceSize offset) const noexcept {
+        BindVertexBuffers(binding, 1, &buffer, &offset);
+    }
+
+    void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, u32 first_instance) const
+        noexcept {
+        dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance);
+    }
+
+    void DrawIndexed(u32 index_count, u32 instance_count, u32 first_index, u32 vertex_offset,
+                     u32 first_instance) const noexcept {
+        dld->vkCmdDrawIndexed(handle, index_count, instance_count, first_index, vertex_offset,
+                              first_instance);
+    }
+
+    void ClearAttachments(Span<VkClearAttachment> attachments, Span<VkClearRect> rects) const
+        noexcept {
+        dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
+                                   rects.data());
+    }
+
+    void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
+                   VkImageLayout dst_layout, Span<VkImageBlit> regions, VkFilter filter) const
+        noexcept {
+        dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
+                            regions.data(), filter);
+    }
+
+    void Dispatch(u32 x, u32 y, u32 z) const noexcept {
+        dld->vkCmdDispatch(handle, x, y, z);
+    }
+
+    void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
+                         VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers,
+                         Span<VkBufferMemoryBarrier> buffer_barriers,
+                         Span<VkImageMemoryBarrier> image_barriers) const noexcept {
+        dld->vkCmdPipelineBarrier(handle, src_stage_mask, dst_stage_mask, dependency_flags,
+                                  memory_barriers.size(), memory_barriers.data(),
+                                  buffer_barriers.size(), buffer_barriers.data(),
+                                  image_barriers.size(), image_barriers.data());
+    }
+
+    void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout,
+                           Span<VkBufferImageCopy> regions) const noexcept {
+        dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(),
+                                    regions.data());
+    }
+
+    void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, Span<VkBufferCopy> regions) const
+        noexcept {
+        dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data());
+    }
+
+    void CopyImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
+                   VkImageLayout dst_layout, Span<VkImageCopy> regions) const noexcept {
+        dld->vkCmdCopyImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
+                            regions.data());
+    }
+
+    void CopyImageToBuffer(VkImage src_image, VkImageLayout src_layout, VkBuffer dst_buffer,
+                           Span<VkBufferImageCopy> regions) const noexcept {
+        dld->vkCmdCopyImageToBuffer(handle, src_image, src_layout, dst_buffer, regions.size(),
+                                    regions.data());
+    }
+
+    void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, u32 data) const
+        noexcept {
+        dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data);
+    }
+
+    void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, u32 offset, u32 size,
+                       const void* values) const noexcept {
+        dld->vkCmdPushConstants(handle, layout, flags, offset, size, values);
+    }
+
+    void SetCheckpointNV(const void* checkpoint_marker) const noexcept {
+        dld->vkCmdSetCheckpointNV(handle, checkpoint_marker);
+    }
+
+    void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept {
+        dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data());
+    }
+
+    void SetScissor(u32 first, Span<VkRect2D> scissors) const noexcept {
+        dld->vkCmdSetScissor(handle, first, scissors.size(), scissors.data());
+    }
+
+    void SetBlendConstants(const float blend_constants[4]) const noexcept {
+        dld->vkCmdSetBlendConstants(handle, blend_constants);
+    }
+
+    void SetStencilCompareMask(VkStencilFaceFlags face_mask, u32 compare_mask) const noexcept {
+        dld->vkCmdSetStencilCompareMask(handle, face_mask, compare_mask);
+    }
+
+    void SetStencilReference(VkStencilFaceFlags face_mask, u32 reference) const noexcept {
+        dld->vkCmdSetStencilReference(handle, face_mask, reference);
+    }
+
+    void SetStencilWriteMask(VkStencilFaceFlags face_mask, u32 write_mask) const noexcept {
+        dld->vkCmdSetStencilWriteMask(handle, face_mask, write_mask);
+    }
+
+    void SetDepthBias(float constant_factor, float clamp, float slope_factor) const noexcept {
+        dld->vkCmdSetDepthBias(handle, constant_factor, clamp, slope_factor);
+    }
+
+    void SetDepthBounds(float min_depth_bounds, float max_depth_bounds) const noexcept {
+        dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
+    }
+
+    void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
+                                         const VkDeviceSize* offsets,
+                                         const VkDeviceSize* sizes) const noexcept {
+        dld->vkCmdBindTransformFeedbackBuffersEXT(handle, first, count, buffers, offsets, sizes);
+    }
+
+    void BeginTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count,
+                                   const VkBuffer* counter_buffers,
+                                   const VkDeviceSize* counter_buffer_offsets) const noexcept {
+        dld->vkCmdBeginTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count,
+                                            counter_buffers, counter_buffer_offsets);
+    }
+
+    void EndTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count,
+                                 const VkBuffer* counter_buffers,
+                                 const VkDeviceSize* counter_buffer_offsets) const noexcept {
+        dld->vkCmdEndTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count,
+                                          counter_buffers, counter_buffer_offsets);
+    }
+
+private:
+    VkCommandBuffer handle;
+    const DeviceDispatch* dld;
+};
+
+std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
+    const InstanceDispatch& dld);
+
+} // namespace Vulkan::vk
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 2fe787d6f..0f4c3103a 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -235,34 +235,30 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
     case OpCode::Id::LEA_IMM:
     case OpCode::Id::LEA_RZ:
     case OpCode::Id::LEA_HI: {
-        const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
+        auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
             switch (opcode->get().GetId()) {
             case OpCode::Id::LEA_R2: {
                 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
                         Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
             }
-
             case OpCode::Id::LEA_R1: {
                 const bool neg = instr.lea.r1.neg != 0;
                 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                         GetRegister(instr.gpr20),
                         Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
             }
-
             case OpCode::Id::LEA_IMM: {
                 const bool neg = instr.lea.imm.neg != 0;
                 return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
                         GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                         Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
             }
-
             case OpCode::Id::LEA_RZ: {
                 const bool neg = instr.lea.rz.neg != 0;
                 return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
                         GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                         Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
             }
-
             case OpCode::Id::LEA_HI:
             default:
                 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
@@ -275,12 +271,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
                              "Unhandled LEA Predicate");
 
-        const Node shifted_c =
-            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c);
-        const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c);
-        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc);
-
-        SetRegister(bb, instr.gpr0, value);
+        Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c));
+        value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value));
+        SetRegister(bb, instr.gpr0, std::move(value));
 
         break;
     }
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 6ead42070..c72690b2b 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -138,18 +138,23 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
 
         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
 
-        value = [&]() {
+        value = [&] {
+            if (instr.conversion.src_size != instr.conversion.dst_size) {
+                // Rounding operations only matter when the source and destination conversion size
+                // is the same.
+                return value;
+            }
             switch (instr.conversion.f2f.GetRoundingMode()) {
             case Tegra::Shader::F2fRoundingOp::None:
                 return value;
             case Tegra::Shader::F2fRoundingOp::Round:
-                return Operation(OperationCode::FRoundEven, PRECISE, value);
+                return Operation(OperationCode::FRoundEven, value);
             case Tegra::Shader::F2fRoundingOp::Floor:
-                return Operation(OperationCode::FFloor, PRECISE, value);
+                return Operation(OperationCode::FFloor, value);
             case Tegra::Shader::F2fRoundingOp::Ceil:
-                return Operation(OperationCode::FCeil, PRECISE, value);
+                return Operation(OperationCode::FCeil, value);
             case Tegra::Shader::F2fRoundingOp::Trunc:
-                return Operation(OperationCode::FTrunc, PRECISE, value);
+                return Operation(OperationCode::FTrunc, value);
             default:
                 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
                                   static_cast<u32>(instr.conversion.f2f.rounding.Value()));
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index d2fe4ec5d..0dd7a1196 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -13,13 +13,247 @@
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/textures/texture.h"
 
 namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
+using Tegra::Shader::StoreType;
+using Tegra::Texture::ComponentType;
+using Tegra::Texture::TextureFormat;
+using Tegra::Texture::TICEntry;
 
 namespace {
+
+ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
+                               std::size_t component) {
+    const TextureFormat format{descriptor.format};
+    switch (format) {
+    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32_G32:
+    case TextureFormat::R16_G16:
+    case TextureFormat::R32:
+    case TextureFormat::R16:
+    case TextureFormat::R8:
+    case TextureFormat::R1:
+        if (component == 0) {
+            return descriptor.r_type;
+        }
+        if (component == 1) {
+            return descriptor.g_type;
+        }
+        if (component == 2) {
+            return descriptor.b_type;
+        }
+        if (component == 3) {
+            return descriptor.a_type;
+        }
+        break;
+    case TextureFormat::A8R8G8B8:
+        if (component == 0) {
+            return descriptor.a_type;
+        }
+        if (component == 1) {
+            return descriptor.r_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        if (component == 3) {
+            return descriptor.b_type;
+        }
+        break;
+    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A4B4G4R4:
+    case TextureFormat::A5B5G5R1:
+    case TextureFormat::A1B5G5R5:
+        if (component == 0) {
+            return descriptor.a_type;
+        }
+        if (component == 1) {
+            return descriptor.b_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        if (component == 3) {
+            return descriptor.r_type;
+        }
+        break;
+    case TextureFormat::R32_B24G8:
+        if (component == 0) {
+            return descriptor.r_type;
+        }
+        if (component == 1) {
+            return descriptor.b_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        break;
+    case TextureFormat::B5G6R5:
+    case TextureFormat::B6G5R5:
+        if (component == 0) {
+            return descriptor.b_type;
+        }
+        if (component == 1) {
+            return descriptor.g_type;
+        }
+        if (component == 2) {
+            return descriptor.r_type;
+        }
+        break;
+    case TextureFormat::G8R24:
+    case TextureFormat::G24R8:
+    case TextureFormat::G8R8:
+    case TextureFormat::G4R4:
+        if (component == 0) {
+            return descriptor.g_type;
+        }
+        if (component == 1) {
+            return descriptor.r_type;
+        }
+        break;
+    }
+    UNIMPLEMENTED_MSG("texture format not implement={}", format);
+    return ComponentType::FLOAT;
+}
+
+bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
+    constexpr u8 R = 0b0001;
+    constexpr u8 G = 0b0010;
+    constexpr u8 B = 0b0100;
+    constexpr u8 A = 0b1000;
+    constexpr std::array<u8, 16> mask = {
+        0,   (R),     (G),     (R | G),     (B),     (R | B),     (G | B),     (R | G | B),
+        (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
+    return std::bitset<4>{mask.at(component_mask)}.test(component);
+}
+
+u32 GetComponentSize(TextureFormat format, std::size_t component) {
+    switch (format) {
+    case TextureFormat::R32_G32_B32_A32:
+        return 32;
+    case TextureFormat::R16_G16_B16_A16:
+        return 16;
+    case TextureFormat::R32_G32_B32:
+        return component <= 2 ? 32 : 0;
+    case TextureFormat::R32_G32:
+        return component <= 1 ? 32 : 0;
+    case TextureFormat::R16_G16:
+        return component <= 1 ? 16 : 0;
+    case TextureFormat::R32:
+        return component == 0 ? 32 : 0;
+    case TextureFormat::R16:
+        return component == 0 ? 16 : 0;
+    case TextureFormat::R8:
+        return component == 0 ? 8 : 0;
+    case TextureFormat::R1:
+        return component == 0 ? 1 : 0;
+    case TextureFormat::A8R8G8B8:
+        return 8;
+    case TextureFormat::A2B10G10R10:
+        return (component == 3 || component == 2 || component == 1) ? 10 : 2;
+    case TextureFormat::A4B4G4R4:
+        return 4;
+    case TextureFormat::A5B5G5R1:
+        return (component == 0 || component == 1 || component == 2) ? 5 : 1;
+    case TextureFormat::A1B5G5R5:
+        return (component == 1 || component == 2 || component == 3) ? 5 : 1;
+    case TextureFormat::R32_B24G8:
+        if (component == 0) {
+            return 32;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        if (component == 2) {
+            return 8;
+        }
+        return 0;
+    case TextureFormat::B5G6R5:
+        if (component == 0 || component == 2) {
+            return 5;
+        }
+        if (component == 1) {
+            return 6;
+        }
+        return 0;
+    case TextureFormat::B6G5R5:
+        if (component == 1 || component == 2) {
+            return 5;
+        }
+        if (component == 0) {
+            return 6;
+        }
+        return 0;
+    case TextureFormat::G8R24:
+        if (component == 0) {
+            return 8;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        return 0;
+    case TextureFormat::G24R8:
+        if (component == 0) {
+            return 8;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        return 0;
+    case TextureFormat::G8R8:
+        return (component == 0 || component == 1) ? 8 : 0;
+    case TextureFormat::G4R4:
+        return (component == 0 || component == 1) ? 4 : 0;
+    default:
+        UNIMPLEMENTED_MSG("texture format not implement={}", format);
+        return 0;
+    }
+}
+
+std::size_t GetImageComponentMask(TextureFormat format) {
+    constexpr u8 R = 0b0001;
+    constexpr u8 G = 0b0010;
+    constexpr u8 B = 0b0100;
+    constexpr u8 A = 0b1000;
+    switch (format) {
+    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::A8R8G8B8:
+    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A4B4G4R4:
+    case TextureFormat::A5B5G5R1:
+    case TextureFormat::A1B5G5R5:
+        return std::size_t{R | G | B | A};
+    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32_B24G8:
+    case TextureFormat::B5G6R5:
+    case TextureFormat::B6G5R5:
+        return std::size_t{R | G | B};
+    case TextureFormat::R32_G32:
+    case TextureFormat::R16_G16:
+    case TextureFormat::G8R24:
+    case TextureFormat::G24R8:
+    case TextureFormat::G8R8:
+    case TextureFormat::G4R4:
+        return std::size_t{R | G};
+    case TextureFormat::R32:
+    case TextureFormat::R16:
+    case TextureFormat::R8:
+    case TextureFormat::R1:
+        return std::size_t{R};
+    default:
+        UNIMPLEMENTED_MSG("texture format not implement={}", format);
+        return std::size_t{R | G | B | A};
+    }
+}
+
 std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
     switch (image_type) {
     case Tegra::Shader::ImageType::Texture1D:
@@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
 }
 } // Anonymous namespace
 
+std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
+                                                  Node original_value) {
+    switch (component_type) {
+    case ComponentType::SNORM: {
+        // range [-1.0, 1.0]
+        auto cnv_value = Operation(OperationCode::FMul, original_value,
+                                   Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
+        cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
+        return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
+    }
+    case ComponentType::SINT:
+    case ComponentType::UNORM: {
+        bool is_signed = component_type == ComponentType::SINT;
+        // range [0.0, 1.0]
+        auto cnv_value = Operation(OperationCode::FMul, original_value,
+                                   Immediate(static_cast<float>(1 << component_size) - 1.f));
+        return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
+                is_signed};
+    }
+    case ComponentType::UINT: // range [0, (1 << component_size) - 1]
+        return {std::move(original_value), false};
+    case ComponentType::FLOAT:
+        if (component_size == 16) {
+            return {Operation(OperationCode::HCastFloat, original_value), true};
+        } else {
+            return {std::move(original_value), true};
+        }
+    default:
+        UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
+        return {std::move(original_value), true};
+    }
+}
+
 u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
@@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
 
     switch (opcode->get().GetId()) {
     case OpCode::Id::SULD: {
-        UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
         UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
                          Tegra::Shader::OutOfBoundsStore::Ignore);
 
@@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
                                               : GetBindlessImage(instr.gpr39, type)};
         image.MarkRead();
 
-        u32 indexer = 0;
-        for (u32 element = 0; element < 4; ++element) {
-            if (!instr.suldst.IsComponentEnabled(element)) {
-                continue;
+        if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
+            u32 indexer = 0;
+            for (u32 element = 0; element < 4; ++element) {
+                if (!instr.suldst.IsComponentEnabled(element)) {
+                    continue;
+                }
+                MetaImage meta{image, {}, element};
+                Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
+                SetTemporary(bb, indexer++, std::move(value));
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+        } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
+            UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
+                             instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
+
+            auto descriptor = [this, instr] {
+                std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
+                if (instr.suldst.is_immediate) {
+                    descriptor =
+                        registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
+                } else {
+                    const Node image_register = GetRegister(instr.gpr39);
+                    const auto [base_image, buffer, offset] = TrackCbuf(
+                        image_register, global_code, static_cast<s64>(global_code.size()));
+                    descriptor = registry.ObtainBindlessSampler(buffer, offset);
+                }
+                if (!descriptor) {
+                    UNREACHABLE_MSG("Failed to obtain image descriptor");
+                }
+                return *descriptor;
+            }();
+
+            const auto comp_mask = GetImageComponentMask(descriptor.format);
+
+            switch (instr.suldst.GetStoreDataLayout()) {
+            case StoreType::Bits32:
+            case StoreType::Bits64: {
+                u32 indexer = 0;
+                u32 shifted_counter = 0;
+                Node value = Immediate(0);
+                for (u32 element = 0; element < 4; ++element) {
+                    if (!IsComponentEnabled(comp_mask, element)) {
+                        continue;
+                    }
+                    const auto component_type = GetComponentType(descriptor, element);
+                    const auto component_size = GetComponentSize(descriptor.format, element);
+                    MetaImage meta{image, {}, element};
+
+                    auto [converted_value, is_signed] = GetComponentValue(
+                        component_type, component_size,
+                        Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
+
+                    // shift element to correct position
+                    const auto shifted = shifted_counter;
+                    if (shifted > 0) {
+                        converted_value =
+                            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
+                                            std::move(converted_value), Immediate(shifted));
+                    }
+                    shifted_counter += component_size;
+
+                    // add value into result
+                    value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
+
+                    // if we shifted enough for 1 byte -> we save it into temp
+                    if (shifted_counter >= 32) {
+                        SetTemporary(bb, indexer++, std::move(value));
+                        // reset counter and value to prepare pack next byte
+                        value = Immediate(0);
+                        shifted_counter = 0;
+                    }
+                }
+                for (u32 i = 0; i < indexer; ++i) {
+                    SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+                }
+                break;
+            }
+            default:
+                UNREACHABLE();
+                break;
             }
-            MetaImage meta{image, {}, element};
-            Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
-            SetTemporary(bb, indexer++, std::move(value));
-        }
-        for (u32 i = 0; i < indexer; ++i) {
-            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
         }
         break;
     }
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index b5fbc4d58..b8f63922f 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -19,7 +19,6 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::AtomicOp;
 using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
-using Tegra::Shader::GlobalAtomicOp;
 using Tegra::Shader::GlobalAtomicType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
@@ -28,6 +27,31 @@ using Tegra::Shader::StoreType;
 
 namespace {
 
+Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) {
+    const OperationCode operation_code = [op] {
+        switch (op) {
+        case AtomicOp::Add:
+            return OperationCode::AtomicIAdd;
+        case AtomicOp::Min:
+            return OperationCode::AtomicIMin;
+        case AtomicOp::Max:
+            return OperationCode::AtomicIMax;
+        case AtomicOp::And:
+            return OperationCode::AtomicIAnd;
+        case AtomicOp::Or:
+            return OperationCode::AtomicIOr;
+        case AtomicOp::Xor:
+            return OperationCode::AtomicIXor;
+        case AtomicOp::Exch:
+            return OperationCode::AtomicIExchange;
+        default:
+            UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
+            return OperationCode::AtomicIAdd;
+        }
+    }();
+    return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data));
+}
+
 bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
     return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
            uniform_type == Tegra::Shader::UniformType::UnsignedShort;
@@ -363,10 +387,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::ATOM: {
-        UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
-                             static_cast<int>(instr.atom.operation.Value()));
-        UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
-                             static_cast<int>(instr.atom.type.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
+                                 instr.atom.operation == AtomicOp::Dec ||
+                                 instr.atom.operation == AtomicOp::SafeAdd,
+                             "operation={}", static_cast<int>(instr.atom.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
+                                 instr.atom.type == GlobalAtomicType::U64,
+                             "type={}", static_cast<int>(instr.atom.type.Value()));
 
         const auto [real_address, base_address, descriptor] =
             TrackGlobalMemory(bb, instr, true, true);
@@ -375,25 +402,29 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             break;
         }
 
+        const bool is_signed =
+            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
         Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-        Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
+        Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem,
+                                      GetRegister(instr.gpr20));
         SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
     case OpCode::Id::ATOMS: {
-        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
-                             static_cast<int>(instr.atoms.operation.Value()));
-        UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
-                             static_cast<int>(instr.atoms.type.Value()));
-
+        UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
+                                 instr.atoms.operation == AtomicOp::Dec,
+                             "operation={}", static_cast<int>(instr.atoms.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
+                                 instr.atoms.type == AtomicType::U64,
+                             "type={}", static_cast<int>(instr.atoms.type.Value()));
+        const bool is_signed =
+            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
         const s32 offset = instr.atoms.GetImmediateOffset();
         Node address = GetRegister(instr.gpr8);
         address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
-
-        Node memory = GetSharedMemory(std::move(address));
-        Node data = GetRegister(instr.gpr20);
-
-        Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
+        Node value =
+            GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed,
+                             GetSharedMemory(std::move(address)), GetRegister(instr.gpr20));
         SetRegister(bb, instr.gpr0, std::move(value));
         break;
     }
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 4944e9d69..d4f95b18c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -11,12 +11,17 @@
 
 namespace VideoCommon::Shader {
 
+using std::move;
 using Tegra::Shader::ConditionCode;
 using Tegra::Shader::Instruction;
+using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::PixelImap;
 using Tegra::Shader::Register;
 using Tegra::Shader::SystemVariable;
 
+using Index = Tegra::Shader::Attribute::Index;
+
 u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
@@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         bb.push_back(Operation(OperationCode::Discard));
         break;
     }
-    case OpCode::Id::MOV_SYS: {
+    case OpCode::Id::S2R: {
         const Node value = [this, instr] {
             switch (instr.sys20) {
             case SystemVariable::LaneId:
-                LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
+                LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
                 return Immediate(0U);
             case SystemVariable::InvocationId:
                 return Operation(OperationCode::InvocationId);
             case SystemVariable::Ydirection:
                 return Operation(OperationCode::YNegate);
             case SystemVariable::InvocationInfo:
-                LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
+                LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
+                return Immediate(0U);
+            case SystemVariable::WscaleFactorXY:
+                UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
+                return Immediate(0U);
+            case SystemVariable::WscaleFactorZ:
+                UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
                 return Immediate(0U);
             case SystemVariable::Tid: {
                 Node value = Immediate(0);
@@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
     }
     case OpCode::Id::IPA: {
         const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
-
         const auto attribute = instr.attribute.fmt28;
-        const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
-                                                instr.ipa.sample_mode.Value()};
+        const Index index = attribute.index;
 
         Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
-                                 : GetInputAttribute(attribute.index, attribute.element);
-        const Tegra::Shader::Attribute::Index index = attribute.index.Value();
-        const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
-                                index <= Tegra::Shader::Attribute::Index::Attribute_31;
-        if (is_generic || is_physical) {
-            // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
-            // In theory by setting them as perspective, OpenGL does the perspective correction.
-            // A way must figured to reverse the last step of it.
-            if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
-                value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
+                                 : GetInputAttribute(index, attribute.element);
+
+        // Code taken from Ryujinx.
+        if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
+            const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
+            if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
+                Node position_w = GetInputAttribute(Index::Position, 3);
+                value = Operation(OperationCode::FMul, move(value), move(position_w));
             }
         }
-        value = GetSaturatedFloat(value, instr.ipa.saturate);
 
-        SetRegister(bb, instr.gpr0, value);
+        if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
+            value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
+        }
+
+        value = GetSaturatedFloat(move(value), instr.ipa.saturate);
+
+        SetRegister(bb, instr.gpr0, move(value));
         break;
     }
     case OpCode::Id::OUT_R: {
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index a1828546e..5fcc9da60 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,7 +162,21 @@ enum class OperationCode {
     AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
     AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
 
-    AtomicAdd, /// (memory, {u}int) -> {u}int
+    AtomicUExchange, /// (memory, uint) -> uint
+    AtomicUAdd,      /// (memory, uint) -> uint
+    AtomicUMin,      /// (memory, uint) -> uint
+    AtomicUMax,      /// (memory, uint) -> uint
+    AtomicUAnd,      /// (memory, uint) -> uint
+    AtomicUOr,       /// (memory, uint) -> uint
+    AtomicUXor,      /// (memory, uint) -> uint
+
+    AtomicIExchange, /// (memory, int) -> int
+    AtomicIAdd,      /// (memory, int) -> int
+    AtomicIMin,      /// (memory, int) -> int
+    AtomicIMax,      /// (memory, int) -> int
+    AtomicIAnd,      /// (memory, int) -> int
+    AtomicIOr,       /// (memory, int) -> int
+    AtomicIXor,      /// (memory, int) -> int
 
     Branch,         /// (uint branch_target) -> void
     BranchIndirect, /// (uint branch_target) -> void
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 76c56abb5..7bf4ff387 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -86,6 +86,20 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
         return OperationCode::LogicalUNotEqual;
     case OperationCode::LogicalIGreaterEqual:
         return OperationCode::LogicalUGreaterEqual;
+    case OperationCode::AtomicIExchange:
+        return OperationCode::AtomicUExchange;
+    case OperationCode::AtomicIAdd:
+        return OperationCode::AtomicUAdd;
+    case OperationCode::AtomicIMin:
+        return OperationCode::AtomicUMin;
+    case OperationCode::AtomicIMax:
+        return OperationCode::AtomicUMax;
+    case OperationCode::AtomicIAnd:
+        return OperationCode::AtomicUAnd;
+    case OperationCode::AtomicIOr:
+        return OperationCode::AtomicUOr;
+    case OperationCode::AtomicIXor:
+        return OperationCode::AtomicUXor;
     case OperationCode::INegate:
         UNREACHABLE_MSG("Can't negate an unsigned integer");
         return {};
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index baf7188d2..8852c8a1b 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
     switch (cc) {
     case Tegra::Shader::ConditionCode::NEU:
         return GetInternalFlag(InternalFlag::Zero, true);
+    case Tegra::Shader::ConditionCode::FCSM_TR:
+        UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
+        return MakeNode<PredicateNode>(Pred::NeverExecute, false);
     default:
         UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
         return MakeNode<PredicateNode>(Pred::NeverExecute, false);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0f1ebef1b..c6e7bdf50 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -312,6 +312,10 @@ private:
     /// Conditionally saturates a half float pair
     Node GetSaturatedHalfFloat(Node value, bool saturate = true);
 
+    /// Get image component value by type and size
+    std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
+                                            u32 component_size, Node original_value);
+
     /// Returns a predicate comparing two floats
     Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
     /// Returns a predicate comparing two integers
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index ae8817465..e0acd44d3 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -504,103 +504,6 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
     return GetFormatBpp(pixel_format) / CHAR_BIT;
 }
 
-enum class SurfaceCompression {
-    None,       // Not compressed
-    Compressed, // Texture is compressed
-    Converted,  // Texture is converted before upload or after download
-    Rearranged, // Texture is swizzled before upload or after download
-};
-
-constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{
-    SurfaceCompression::None,       // ABGR8U
-    SurfaceCompression::None,       // ABGR8S
-    SurfaceCompression::None,       // ABGR8UI
-    SurfaceCompression::None,       // B5G6R5U
-    SurfaceCompression::None,       // A2B10G10R10U
-    SurfaceCompression::None,       // A1B5G5R5U
-    SurfaceCompression::None,       // R8U
-    SurfaceCompression::None,       // R8UI
-    SurfaceCompression::None,       // RGBA16F
-    SurfaceCompression::None,       // RGBA16U
-    SurfaceCompression::None,       // RGBA16S
-    SurfaceCompression::None,       // RGBA16UI
-    SurfaceCompression::None,       // R11FG11FB10F
-    SurfaceCompression::None,       // RGBA32UI
-    SurfaceCompression::Compressed, // DXT1
-    SurfaceCompression::Compressed, // DXT23
-    SurfaceCompression::Compressed, // DXT45
-    SurfaceCompression::Compressed, // DXN1
-    SurfaceCompression::Compressed, // DXN2UNORM
-    SurfaceCompression::Compressed, // DXN2SNORM
-    SurfaceCompression::Compressed, // BC7U
-    SurfaceCompression::Compressed, // BC6H_UF16
-    SurfaceCompression::Compressed, // BC6H_SF16
-    SurfaceCompression::Converted,  // ASTC_2D_4X4
-    SurfaceCompression::None,       // BGRA8
-    SurfaceCompression::None,       // RGBA32F
-    SurfaceCompression::None,       // RG32F
-    SurfaceCompression::None,       // R32F
-    SurfaceCompression::None,       // R16F
-    SurfaceCompression::None,       // R16U
-    SurfaceCompression::None,       // R16S
-    SurfaceCompression::None,       // R16UI
-    SurfaceCompression::None,       // R16I
-    SurfaceCompression::None,       // RG16
-    SurfaceCompression::None,       // RG16F
-    SurfaceCompression::None,       // RG16UI
-    SurfaceCompression::None,       // RG16I
-    SurfaceCompression::None,       // RG16S
-    SurfaceCompression::None,       // RGB32F
-    SurfaceCompression::None,       // RGBA8_SRGB
-    SurfaceCompression::None,       // RG8U
-    SurfaceCompression::None,       // RG8S
-    SurfaceCompression::None,       // RG32UI
-    SurfaceCompression::None,       // RGBX16F
-    SurfaceCompression::None,       // R32UI
-    SurfaceCompression::None,       // R32I
-    SurfaceCompression::Converted,  // ASTC_2D_8X8
-    SurfaceCompression::Converted,  // ASTC_2D_8X5
-    SurfaceCompression::Converted,  // ASTC_2D_5X4
-    SurfaceCompression::None,       // BGRA8_SRGB
-    SurfaceCompression::Compressed, // DXT1_SRGB
-    SurfaceCompression::Compressed, // DXT23_SRGB
-    SurfaceCompression::Compressed, // DXT45_SRGB
-    SurfaceCompression::Compressed, // BC7U_SRGB
-    SurfaceCompression::None,       // R4G4B4A4U
-    SurfaceCompression::Converted,  // ASTC_2D_4X4_SRGB
-    SurfaceCompression::Converted,  // ASTC_2D_8X8_SRGB
-    SurfaceCompression::Converted,  // ASTC_2D_8X5_SRGB
-    SurfaceCompression::Converted,  // ASTC_2D_5X4_SRGB
-    SurfaceCompression::Converted,  // ASTC_2D_5X5
-    SurfaceCompression::Converted,  // ASTC_2D_5X5_SRGB
-    SurfaceCompression::Converted,  // ASTC_2D_10X8
-    SurfaceCompression::Converted,  // ASTC_2D_10X8_SRGB
-    SurfaceCompression::Converted,  // ASTC_2D_6X6
-    SurfaceCompression::Converted,  // ASTC_2D_6X6_SRGB
-    SurfaceCompression::Converted,  // ASTC_2D_10X10
-    SurfaceCompression::Converted,  // ASTC_2D_10X10_SRGB
-    SurfaceCompression::Converted,  // ASTC_2D_12X12
-    SurfaceCompression::Converted,  // ASTC_2D_12X12_SRGB
-    SurfaceCompression::Converted,  // ASTC_2D_8X6
-    SurfaceCompression::Converted,  // ASTC_2D_8X6_SRGB
-    SurfaceCompression::Converted,  // ASTC_2D_6X5
-    SurfaceCompression::Converted,  // ASTC_2D_6X5_SRGB
-    SurfaceCompression::None,       // E5B9G9R9F
-    SurfaceCompression::None,       // Z32F
-    SurfaceCompression::None,       // Z16
-    SurfaceCompression::None,       // Z24S8
-    SurfaceCompression::Rearranged, // S8Z24
-    SurfaceCompression::None,       // Z32FS8
-}};
-
-constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) {
-    if (format == PixelFormat::Invalid) {
-        return SurfaceCompression::None;
-    }
-    DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size());
-    return compression_type_table[static_cast<std::size_t>(format)];
-}
-
 SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
 
 bool SurfaceTargetIsLayered(SurfaceTarget target);
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 002df414f..7af0e792c 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -18,15 +18,20 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192,
 
 using Tegra::Texture::ConvertFromGuestToHost;
 using VideoCore::MortonSwizzleMode;
-using VideoCore::Surface::SurfaceCompression;
+using VideoCore::Surface::IsPixelFormatASTC;
+using VideoCore::Surface::PixelFormat;
 
 StagingCache::StagingCache() = default;
 
 StagingCache::~StagingCache() = default;
 
-SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
-    : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr},
-      mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) {
+SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
+                                 bool is_astc_supported)
+    : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels),
+      mipmap_offsets(params.num_levels) {
+    is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported;
+    host_memory_size = params.GetHostSizeInBytes(is_converted);
+
     std::size_t offset = 0;
     for (u32 level = 0; level < params.num_levels; ++level) {
         const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
@@ -164,7 +169,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf
 
     std::size_t guest_offset{mipmap_offsets[level]};
     if (params.is_layered) {
-        std::size_t host_offset{0};
+        std::size_t host_offset = 0;
         const std::size_t guest_stride = layer_size;
         const std::size_t host_stride = params.GetHostLayerSize(level);
         for (u32 layer = 0; layer < params.depth; ++layer) {
@@ -185,28 +190,17 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
     MICROPROFILE_SCOPE(GPU_Load_Texture);
     auto& staging_buffer = staging_cache.GetBuffer(0);
     u8* host_ptr;
-    is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
-
-    // Handle continuouty
-    if (is_continuous) {
-        // Use physical memory directly
-        host_ptr = memory_manager.GetPointer(gpu_addr);
-        if (!host_ptr) {
-            return;
-        }
-    } else {
-        // Use an extra temporal buffer
-        auto& tmp_buffer = staging_cache.GetBuffer(1);
-        tmp_buffer.resize(guest_memory_size);
-        host_ptr = tmp_buffer.data();
-        memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
-    }
+    // Use an extra temporal buffer
+    auto& tmp_buffer = staging_cache.GetBuffer(1);
+    tmp_buffer.resize(guest_memory_size);
+    host_ptr = tmp_buffer.data();
+    memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
 
     if (params.is_tiled) {
         ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
                    params.block_width, static_cast<u32>(params.target));
         for (u32 level = 0; level < params.num_levels; ++level) {
-            const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
+            const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
             SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
                         staging_buffer.data() + host_offset, level);
         }
@@ -219,7 +213,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
         const u32 height{(params.height + block_height - 1) / block_height};
         const u32 copy_size{width * bpp};
         if (params.pitch == copy_size) {
-            std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes());
+            std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
         } else {
             const u8* start{host_ptr};
             u8* write_to{staging_buffer.data()};
@@ -231,19 +225,15 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
         }
     }
 
-    auto compression_type = params.GetCompressionType();
-    if (compression_type == SurfaceCompression::None ||
-        compression_type == SurfaceCompression::Compressed)
+    if (!is_converted && params.pixel_format != PixelFormat::S8Z24) {
         return;
+    }
 
-    for (u32 level_up = params.num_levels; level_up > 0; --level_up) {
-        const u32 level = level_up - 1;
-        const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)};
-        const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged
-                                                ? in_host_offset
-                                                : params.GetConvertedMipmapOffset(level);
-        u8* in_buffer = staging_buffer.data() + in_host_offset;
-        u8* out_buffer = staging_buffer.data() + out_host_offset;
+    for (u32 level = params.num_levels; level--;) {
+        const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
+        const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
+        u8* const in_buffer = staging_buffer.data() + in_host_offset;
+        u8* const out_buffer = staging_buffer.data() + out_host_offset;
         ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
                                params.GetMipWidth(level), params.GetMipHeight(level),
                                params.GetMipDepth(level), true, true);
@@ -256,24 +246,15 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
     auto& staging_buffer = staging_cache.GetBuffer(0);
     u8* host_ptr;
 
-    // Handle continuouty
-    if (is_continuous) {
-        // Use physical memory directly
-        host_ptr = memory_manager.GetPointer(gpu_addr);
-        if (!host_ptr) {
-            return;
-        }
-    } else {
-        // Use an extra temporal buffer
-        auto& tmp_buffer = staging_cache.GetBuffer(1);
-        tmp_buffer.resize(guest_memory_size);
-        host_ptr = tmp_buffer.data();
-    }
+    // Use an extra temporal buffer
+    auto& tmp_buffer = staging_cache.GetBuffer(1);
+    tmp_buffer.resize(guest_memory_size);
+    host_ptr = tmp_buffer.data();
 
     if (params.is_tiled) {
         ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
         for (u32 level = 0; level < params.num_levels; ++level) {
-            const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
+            const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
             SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
                         staging_buffer.data() + host_offset, level);
         }
@@ -299,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
             }
         }
     }
-    if (!is_continuous) {
-        memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
-    }
+    memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
 }
 
 } // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 5f79bb0aa..a39a8661b 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -68,8 +68,8 @@ public:
         return gpu_addr;
     }
 
-    bool Overlaps(const CacheAddr start, const CacheAddr end) const {
-        return (cache_addr < end) && (cache_addr_end > start);
+    bool Overlaps(const VAddr start, const VAddr end) const {
+        return (cpu_addr < end) && (cpu_addr_end > start);
     }
 
     bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
@@ -86,21 +86,13 @@ public:
         return cpu_addr;
     }
 
-    void SetCpuAddr(const VAddr new_addr) {
-        cpu_addr = new_addr;
-    }
-
-    CacheAddr GetCacheAddr() const {
-        return cache_addr;
+    VAddr GetCpuAddrEnd() const {
+        return cpu_addr_end;
     }
 
-    CacheAddr GetCacheAddrEnd() const {
-        return cache_addr_end;
-    }
-
-    void SetCacheAddr(const CacheAddr new_addr) {
-        cache_addr = new_addr;
-        cache_addr_end = new_addr + guest_memory_size;
+    void SetCpuAddr(const VAddr new_addr) {
+        cpu_addr = new_addr;
+        cpu_addr_end = new_addr + guest_memory_size;
     }
 
     const SurfaceParams& GetSurfaceParams() const {
@@ -119,18 +111,14 @@ public:
         return mipmap_sizes[level];
     }
 
-    void MarkAsContinuous(const bool is_continuous) {
-        this->is_continuous = is_continuous;
-    }
-
-    bool IsContinuous() const {
-        return is_continuous;
-    }
-
     bool IsLinear() const {
         return !params.is_tiled;
     }
 
+    bool IsConverted() const {
+        return is_converted;
+    }
+
     bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
         return params.pixel_format == pixel_format;
     }
@@ -160,7 +148,8 @@ public:
     }
 
 protected:
-    explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params);
+    explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
+                             bool is_astc_supported);
     ~SurfaceBaseImpl() = default;
 
     virtual void DecorateSurfaceName() = 0;
@@ -168,12 +157,11 @@ protected:
     const SurfaceParams params;
     std::size_t layer_size;
     std::size_t guest_memory_size;
-    const std::size_t host_memory_size;
+    std::size_t host_memory_size;
     GPUVAddr gpu_addr{};
-    CacheAddr cache_addr{};
-    CacheAddr cache_addr_end{};
     VAddr cpu_addr{};
-    bool is_continuous{};
+    VAddr cpu_addr_end{};
+    bool is_converted{};
 
     std::vector<std::size_t> mipmap_sizes;
     std::vector<std::size_t> mipmap_offsets;
@@ -288,8 +276,9 @@ public:
     }
 
 protected:
-    explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params)
-        : SurfaceBaseImpl(gpu_addr, params) {}
+    explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params,
+                         bool is_astc_supported)
+        : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {}
 
     ~SurfaceBase() = default;
 
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 9931c5ef7..6f3ef45be 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -113,10 +113,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
         params.height = tic.Height();
         params.depth = tic.Depth();
         params.pitch = params.is_tiled ? 0 : tic.Pitch();
-        if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
-            params.depth = 1;
-        } else if (params.target == SurfaceTarget::TextureCubemap ||
-                   params.target == SurfaceTarget::TextureCubeArray) {
+        if (params.target == SurfaceTarget::TextureCubemap ||
+            params.target == SurfaceTarget::TextureCubeArray) {
             params.depth *= 6;
         }
         params.num_levels = tic.max_mip_level + 1;
@@ -309,28 +307,26 @@ std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
     return offset;
 }
 
-std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
-    std::size_t offset = 0;
-    for (u32 i = 0; i < level; i++) {
-        offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
-    }
-    return offset;
-}
-
-std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const {
+std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
     std::size_t offset = 0;
-    for (u32 i = 0; i < level; i++) {
-        offset += GetConvertedMipmapSize(i);
+    if (is_converted) {
+        for (u32 i = 0; i < level; ++i) {
+            offset += GetConvertedMipmapSize(i) * GetNumLayers();
+        }
+    } else {
+        for (u32 i = 0; i < level; ++i) {
+            offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
+        }
     }
     return offset;
 }
 
 std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
     constexpr std::size_t rgba8_bpp = 4ULL;
-    const std::size_t width_t = GetMipWidth(level);
-    const std::size_t height_t = GetMipHeight(level);
-    const std::size_t depth_t = is_layered ? depth : GetMipDepth(level);
-    return width_t * height_t * depth_t * rgba8_bpp;
+    const std::size_t mip_width = GetMipWidth(level);
+    const std::size_t mip_height = GetMipHeight(level);
+    const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
+    return mip_width * mip_height * mip_depth * rgba8_bpp;
 }
 
 std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 995cc3818..24957df8d 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -20,8 +20,6 @@ namespace VideoCommon {
 
 class FormatLookupTable;
 
-using VideoCore::Surface::SurfaceCompression;
-
 class SurfaceParams {
 public:
     /// Creates SurfaceCachedParams from a texture configuration.
@@ -67,16 +65,14 @@ public:
         return GetInnerMemorySize(false, false, false);
     }
 
-    std::size_t GetHostSizeInBytes() const {
-        std::size_t host_size_in_bytes;
-        if (GetCompressionType() == SurfaceCompression::Converted) {
-            // ASTC is uncompressed in software, in emulated as RGBA8
-            host_size_in_bytes = 0;
-            for (u32 level = 0; level < num_levels; ++level) {
-                host_size_in_bytes += GetConvertedMipmapSize(level);
-            }
-        } else {
-            host_size_in_bytes = GetInnerMemorySize(true, false, false);
+    std::size_t GetHostSizeInBytes(bool is_converted) const {
+        if (!is_converted) {
+            return GetInnerMemorySize(true, false, false);
+        }
+        // ASTC is uncompressed in software, in emulated as RGBA8
+        std::size_t host_size_in_bytes = 0;
+        for (u32 level = 0; level < num_levels; ++level) {
+            host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
         }
         return host_size_in_bytes;
     }
@@ -107,9 +103,8 @@ public:
     u32 GetMipBlockDepth(u32 level) const;
 
     /// Returns the best possible row/pitch alignment for the surface.
-    u32 GetRowAlignment(u32 level) const {
-        const u32 bpp =
-            GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel();
+    u32 GetRowAlignment(u32 level, bool is_converted) const {
+        const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
         return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
     }
 
@@ -117,11 +112,7 @@ public:
     std::size_t GetGuestMipmapLevelOffset(u32 level) const;
 
     /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
-    std::size_t GetHostMipmapLevelOffset(u32 level) const;
-
-    /// Returns the offset in bytes in host memory (linear) of a given mipmap level
-    /// for a texture that is converted in host gpu.
-    std::size_t GetConvertedMipmapOffset(u32 level) const;
+    std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
 
     /// Returns the size in bytes in guest memory of a given mipmap level.
     std::size_t GetGuestMipmapSize(u32 level) const {
@@ -196,11 +187,6 @@ public:
                pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
     }
 
-    /// Returns how the compression should be handled for this texture.
-    SurfaceCompression GetCompressionType() const {
-        return VideoCore::Surface::GetFormatCompressionType(pixel_format);
-    }
-
     /// Returns is the surface is a TextureBuffer type of surface.
     bool IsBuffer() const {
         return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 6cdbe63d0..88fe3e25f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
 
 template <typename TSurface, typename TView>
 class TextureCache {
-    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
-    using IntervalType = typename IntervalMap::interval_type;
 
 public:
-    void InvalidateRegion(CacheAddr addr, std::size_t size) {
+    void InvalidateRegion(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
         for (const auto& surface : GetSurfacesInRegion(addr, size)) {
@@ -76,7 +74,7 @@ public:
         guard_samplers = new_guard;
     }
 
-    void FlushRegion(CacheAddr addr, std::size_t size) {
+    void FlushRegion(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
         auto surfaces = GetSurfacesInRegion(addr, size);
@@ -99,9 +97,9 @@ public:
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
 
-        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
-        const auto cache_addr{ToCacheAddr(host_ptr)};
-        if (!cache_addr) {
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        if (!cpu_addr) {
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
 
@@ -110,7 +108,7 @@ public:
         }
 
         const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
-        const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
+        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
         if (guard_samplers) {
             sampled_textures.push_back(surface);
         }
@@ -124,13 +122,13 @@ public:
         if (!gpu_addr) {
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
-        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
-        const auto cache_addr{ToCacheAddr(host_ptr)};
-        if (!cache_addr) {
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        if (!cpu_addr) {
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
         const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
-        const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
+        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
         if (guard_samplers) {
             sampled_textures.push_back(surface);
         }
@@ -159,14 +157,14 @@ public:
             SetEmptyDepthBuffer();
             return {};
         }
-        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
-        const auto cache_addr{ToCacheAddr(host_ptr)};
-        if (!cache_addr) {
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        if (!cpu_addr) {
             SetEmptyDepthBuffer();
             return {};
         }
         const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
-        auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
+        auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
         if (depth_buffer.target)
             depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
         depth_buffer.target = surface_view.first;
@@ -199,15 +197,15 @@ public:
             return {};
         }
 
-        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
-        const auto cache_addr{ToCacheAddr(host_ptr)};
-        if (!cache_addr) {
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        if (!cpu_addr) {
             SetEmptyColorBuffer(index);
             return {};
         }
 
         auto surface_view =
-            GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index),
+            GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
                        preserve_contents, true);
         if (render_targets[index].target)
             render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
@@ -257,27 +255,26 @@ public:
         const GPUVAddr src_gpu_addr = src_config.Address();
         const GPUVAddr dst_gpu_addr = dst_config.Address();
         DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
-        const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)};
-        const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)};
-        const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)};
-        const auto src_cache_addr{ToCacheAddr(src_host_ptr)};
+        const std::optional<VAddr> dst_cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
+        const std::optional<VAddr> src_cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
         std::pair<TSurface, TView> dst_surface =
-            GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false);
+            GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
         std::pair<TSurface, TView> src_surface =
-            GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false);
+            GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
         ImageBlit(src_surface.second, dst_surface.second, copy_config);
         dst_surface.first->MarkAsModified(true, Tick());
     }
 
-    TSurface TryFindFramebufferSurface(const u8* host_ptr) {
-        const CacheAddr cache_addr = ToCacheAddr(host_ptr);
-        if (!cache_addr) {
+    TSurface TryFindFramebufferSurface(VAddr addr) {
+        if (!addr) {
             return nullptr;
         }
-        const CacheAddr page = cache_addr >> registry_page_bits;
+        const VAddr page = addr >> registry_page_bits;
         std::vector<TSurface>& list = registry[page];
         for (auto& surface : list) {
-            if (surface->GetCacheAddr() == cache_addr) {
+            if (surface->GetCpuAddr() == addr) {
                 return surface;
             }
         }
@@ -289,8 +286,9 @@ public:
     }
 
 protected:
-    TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
-        : system{system}, rasterizer{rasterizer} {
+    explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                          bool is_astc_supported)
+        : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} {
         for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
             SetEmptyColorBuffer(i);
         }
@@ -337,18 +335,14 @@ protected:
 
     void Register(TSurface surface) {
         const GPUVAddr gpu_addr = surface->GetGpuAddr();
-        const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
         const std::size_t size = surface->GetSizeInBytes();
         const std::optional<VAddr> cpu_addr =
             system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
-        if (!cache_ptr || !cpu_addr) {
+        if (!cpu_addr) {
             LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
                          gpu_addr);
             return;
         }
-        const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
-        surface->MarkAsContinuous(continuous);
-        surface->SetCacheAddr(cache_ptr);
         surface->SetCpuAddr(*cpu_addr);
         RegisterInnerCache(surface);
         surface->MarkAsRegistered(true);
@@ -381,6 +375,7 @@ protected:
     }
 
     Core::System& system;
+    const bool is_astc_supported;
 
 private:
     enum class RecycleStrategy : u32 {
@@ -632,7 +627,7 @@ private:
     std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
                                                                const SurfaceParams& params,
                                                                const GPUVAddr gpu_addr,
-                                                               const CacheAddr cache_addr,
+                                                               const VAddr cpu_addr,
                                                                bool preserve_contents) {
         if (params.target == SurfaceTarget::Texture3D) {
             bool failed = false;
@@ -657,7 +652,7 @@ private:
                     failed = true;
                     break;
                 }
-                const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr);
+                const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
                 const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
                 modified |= surface->IsModified();
                 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
@@ -677,7 +672,7 @@ private:
         } else {
             for (const auto& surface : overlaps) {
                 if (!surface->MatchTarget(params.target)) {
-                    if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) {
+                    if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
                         if (Settings::values.use_accurate_gpu_emulation) {
                             return std::nullopt;
                         }
@@ -686,7 +681,7 @@ private:
                     }
                     return std::nullopt;
                 }
-                if (surface->GetCacheAddr() != cache_addr) {
+                if (surface->GetCpuAddr() != cpu_addr) {
                     continue;
                 }
                 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
@@ -720,13 +715,13 @@ private:
      *                          left blank.
      * @param is_render         Whether or not the surface is a render target.
      **/
-    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr,
+    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
                                           const SurfaceParams& params, bool preserve_contents,
                                           bool is_render) {
         // Step 1
         // Check Level 1 Cache for a fast structural match. If candidate surface
         // matches at certain level we are pretty much done.
-        if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
+        if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
             TSurface& current_surface = iter->second;
             const auto topological_result = current_surface->MatchesTopology(params);
             if (topological_result != MatchTopologyResult::FullMatch) {
@@ -753,7 +748,7 @@ private:
         // Step 2
         // Obtain all possible overlaps in the memory region
         const std::size_t candidate_size = params.GetGuestSizeInBytes();
-        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
+        auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
 
         // If none are found, we are done. we just load the surface and create it.
         if (overlaps.empty()) {
@@ -775,7 +770,7 @@ private:
         // Check if it's a 3D texture
         if (params.block_depth > 0) {
             auto surface =
-                Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
+                Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
             if (surface) {
                 return *surface;
             }
@@ -850,16 +845,16 @@ private:
      * @param params   The parameters on the candidate surface.
      **/
     Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
-        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
-        const auto cache_addr{ToCacheAddr(host_ptr)};
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
 
-        if (!cache_addr) {
+        if (!cpu_addr) {
             Deduction result{};
             result.type = DeductionType::DeductionFailed;
             return result;
         }
 
-        if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
+        if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
             TSurface& current_surface = iter->second;
             const auto topological_result = current_surface->MatchesTopology(params);
             if (topological_result != MatchTopologyResult::FullMatch) {
@@ -878,7 +873,7 @@ private:
         }
 
         const std::size_t candidate_size = params.GetGuestSizeInBytes();
-        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
+        auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
 
         if (overlaps.empty()) {
             Deduction result{};
@@ -1022,10 +1017,10 @@ private:
     }
 
     void RegisterInnerCache(TSurface& surface) {
-        const CacheAddr cache_addr = surface->GetCacheAddr();
-        CacheAddr start = cache_addr >> registry_page_bits;
-        const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
-        l1_cache[cache_addr] = surface;
+        const VAddr cpu_addr = surface->GetCpuAddr();
+        VAddr start = cpu_addr >> registry_page_bits;
+        const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
+        l1_cache[cpu_addr] = surface;
         while (start <= end) {
             registry[start].push_back(surface);
             start++;
@@ -1033,10 +1028,10 @@ private:
     }
 
     void UnregisterInnerCache(TSurface& surface) {
-        const CacheAddr cache_addr = surface->GetCacheAddr();
-        CacheAddr start = cache_addr >> registry_page_bits;
-        const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
-        l1_cache.erase(cache_addr);
+        const VAddr cpu_addr = surface->GetCpuAddr();
+        VAddr start = cpu_addr >> registry_page_bits;
+        const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
+        l1_cache.erase(cpu_addr);
         while (start <= end) {
             auto& reg{registry[start]};
             reg.erase(std::find(reg.begin(), reg.end(), surface));
@@ -1044,18 +1039,18 @@ private:
         }
     }
 
-    std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
+    std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
         if (size == 0) {
             return {};
         }
-        const CacheAddr cache_addr_end = cache_addr + size;
-        CacheAddr start = cache_addr >> registry_page_bits;
-        const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
+        const VAddr cpu_addr_end = cpu_addr + size;
+        VAddr start = cpu_addr >> registry_page_bits;
+        const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
         std::vector<TSurface> surfaces;
         while (start <= end) {
             std::vector<TSurface>& list = registry[start];
             for (auto& surface : list) {
-                if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) {
+                if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
                     surface->MarkAsPicked(true);
                     surfaces.push_back(surface);
                 }
@@ -1144,14 +1139,14 @@ private:
     // large in size.
     static constexpr u64 registry_page_bits{20};
     static constexpr u64 registry_page_size{1 << registry_page_bits};
-    std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
+    std::unordered_map<VAddr, std::vector<TSurface>> registry;
 
     static constexpr u32 DEPTH_RT = 8;
     static constexpr u32 NO_RT = 0xFFFFFFFF;
 
     // The L1 Cache is used for fast texture lookup before checking the overlaps
     // This avoids calculating size and other stuffs.
-    std::unordered_map<CacheAddr, TSurface> l1_cache;
+    std::unordered_map<VAddr, TSurface> l1_cache;
 
     /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
     /// previously been used. This is to prevent surfaces from being constantly created and
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
new file mode 100644
index 000000000..d1939d744
--- /dev/null
+++ b/src/video_core/textures/texture.cpp
@@ -0,0 +1,80 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+
+#include "core/settings.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra::Texture {
+
+namespace {
+
+constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
+    0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
+    0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
+    0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
+    0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f,
+    0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f,
+    0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f,
+    0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f,
+    0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f,
+    0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f,
+    0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f,
+    0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f,
+    0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f,
+    0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f,
+    0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f,
+    0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f,
+    0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f,
+    0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f,
+    0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f,
+    0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f,
+    0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f,
+    0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f,
+    0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f,
+    0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f,
+    0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f,
+    0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f,
+    0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f,
+    0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f,
+    0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f,
+    0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f,
+    0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f,
+    0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f,
+    0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
+};
+
+unsigned SettingsMinimumAnisotropy() noexcept {
+    switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
+    default:
+    case Anisotropy::Default:
+        return 1U;
+    case Anisotropy::Filter2x:
+        return 2U;
+    case Anisotropy::Filter4x:
+        return 4U;
+    case Anisotropy::Filter8x:
+        return 8U;
+    case Anisotropy::Filter16x:
+        return 16U;
+    }
+}
+
+} // Anonymous namespace
+
+std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
+    if (!srgb_conversion) {
+        return border_color;
+    }
+    return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
+            SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
+}
+
+float TSCEntry::GetMaxAnisotropy() const noexcept {
+    return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
+}
+
+} // namespace Tegra::Texture
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 7edc4abe1..eba05aced 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -8,7 +8,6 @@
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_types.h"
-#include "core/settings.h"
 
 namespace Tegra::Texture {
 
@@ -132,6 +131,20 @@ enum class SwizzleSource : u32 {
     OneFloat = 7,
 };
 
+enum class MsaaMode : u32 {
+    Msaa1x1 = 0,
+    Msaa2x1 = 1,
+    Msaa2x2 = 2,
+    Msaa4x2 = 3,
+    Msaa4x2_D3D = 4,
+    Msaa2x1_D3D = 5,
+    Msaa4x4 = 6,
+    Msaa2x2_VC4 = 8,
+    Msaa2x2_VC12 = 9,
+    Msaa4x2_VC8 = 10,
+    Msaa4x2_VC24 = 11,
+};
+
 union TextureHandle {
     TextureHandle(u32 raw) : raw{raw} {}
 
@@ -198,6 +211,7 @@ struct TICEntry {
     union {
         BitField<0, 4, u32> res_min_mip_level;
         BitField<4, 4, u32> res_max_mip_level;
+        BitField<8, 4, MsaaMode> msaa_mode;
         BitField<12, 12, u32> min_lod_clamp;
     };
 
@@ -336,24 +350,9 @@ struct TSCEntry {
         std::array<u8, 0x20> raw;
     };
 
-    float GetMaxAnisotropy() const {
-        const u32 min_value = [] {
-            switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
-            default:
-            case Anisotropy::Default:
-                return 1U;
-            case Anisotropy::Filter2x:
-                return 2U;
-            case Anisotropy::Filter4x:
-                return 4U;
-            case Anisotropy::Filter8x:
-                return 8U;
-            case Anisotropy::Filter16x:
-                return 16U;
-            }
-        }();
-        return static_cast<float>(std::max(1U << max_anisotropy, min_value));
-    }
+    std::array<float, 4> GetBorderColor() const noexcept;
+
+    float GetMaxAnisotropy() const noexcept;
 
     float GetMinLod() const {
         return static_cast<float>(min_lod_clamp) / 256.0f;
@@ -368,15 +367,6 @@ struct TSCEntry {
         constexpr u32 mask = 1U << (13 - 1);
         return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
     }
-
-    std::array<float, 4> GetBorderColor() const {
-        if (srgb_conversion) {
-            return {static_cast<float>(srgb_border_color_r) / 255.0f,
-                    static_cast<float>(srgb_border_color_g) / 255.0f,
-                    static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]};
-        }
-        return border_color;
-    }
 };
 static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
 
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index a5f81a8a0..f60bdc60a 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -15,13 +15,13 @@
 #endif
 #include "video_core/video_core.h"
 
-namespace VideoCore {
-
-std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
-                                             Core::System& system) {
+namespace {
+std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
+                                                        Core::System& system,
+                                                        Core::Frontend::GraphicsContext& context) {
     switch (Settings::values.renderer_backend) {
     case Settings::RendererBackend::OpenGL:
-        return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
+        return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context);
 #ifdef HAS_VULKAN
     case Settings::RendererBackend::Vulkan:
         return std::make_unique<Vulkan::RendererVulkan>(emu_window, system);
@@ -30,13 +30,23 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind
         return nullptr;
     }
 }
+} // Anonymous namespace
 
-std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) {
-    if (Settings::values.use_asynchronous_gpu_emulation) {
-        return std::make_unique<VideoCommon::GPUAsynch>(system, system.Renderer());
+namespace VideoCore {
+
+std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+    auto context = emu_window.CreateSharedContext();
+    const auto scope = context->Acquire();
+    auto renderer = CreateRenderer(emu_window, system, *context);
+    if (!renderer->Init()) {
+        return nullptr;
     }
 
-    return std::make_unique<VideoCommon::GPUSynch>(system, system.Renderer());
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer),
+                                                        std::move(context));
+    }
+    return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context));
 }
 
 u16 GetResolutionScaleFactor(const RendererBase& renderer) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index b8e0ac372..f5c27125d 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -22,17 +22,8 @@ namespace VideoCore {
 
 class RendererBase;
 
-/**
- * Creates a renderer instance.
- *
- * @note The returned renderer instance is simply allocated. Its Init()
- *       function still needs to be called to fully complete its setup.
- */
-std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
-                                             Core::System& system);
-
 /// Creates an emulated GPU instance using the given system context.
-std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system);
+std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system);
 
 u16 GetResolutionScaleFactor(const RendererBase& renderer);