39 files changed, 831 insertions, 472 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index effe76a63..f7febd6a2 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -148,6 +148,7 @@ add_library(video_core STATIC
     textures/convert.h
     textures/decoders.cpp
     textures/decoders.h
+    textures/texture.cpp
     textures/texture.h
     video_core.cpp
     video_core.h
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index 4b9193182..e35ee0b67 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,37 +15,29 @@ namespace VideoCommon {
 
 class BufferBlock {
 public:
-    bool Overlaps(const CacheAddr start, const CacheAddr end) const {
-        return (cache_addr < end) && (cache_addr_end > start);
+    bool Overlaps(const VAddr start, const VAddr end) const {
+        return (cpu_addr < end) && (cpu_addr_end > start);
     }
 
-    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
-        return cache_addr <= other_start && other_end <= cache_addr_end;
+    bool IsInside(const VAddr other_start, const VAddr other_end) const {
+        return cpu_addr <= other_start && other_end <= cpu_addr_end;
     }
 
-    u8* GetWritableHostPtr() const {
-        return FromCacheAddr(cache_addr);
+    std::size_t GetOffset(const VAddr in_addr) {
+        return static_cast<std::size_t>(in_addr - cpu_addr);
     }
 
-    u8* GetWritableHostPtr(std::size_t offset) const {
-        return FromCacheAddr(cache_addr + offset);
+    VAddr GetCpuAddr() const {
+        return cpu_addr;
     }
 
-    std::size_t GetOffset(const CacheAddr in_addr) {
-        return static_cast<std::size_t>(in_addr - cache_addr);
+    VAddr GetCpuAddrEnd() const {
+        return cpu_addr_end;
     }
 
-    CacheAddr GetCacheAddr() const {
-        return cache_addr;
-    }
-
-    CacheAddr GetCacheAddrEnd() const {
-        return cache_addr_end;
-    }
-
-    void SetCacheAddr(const CacheAddr new_addr) {
-        cache_addr = new_addr;
-        cache_addr_end = new_addr + size;
+    void SetCpuAddr(const VAddr new_addr) {
+        cpu_addr = new_addr;
+        cpu_addr_end = new_addr + size;
     }
 
     std::size_t GetSize() const {
@@ -61,14 +53,14 @@ public:
     }
 
 protected:
-    explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} {
-        SetCacheAddr(cache_addr);
+    explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
+        SetCpuAddr(cpu_addr);
     }
     ~BufferBlock() = default;
 
 private:
-    CacheAddr cache_addr{};
-    CacheAddr cache_addr_end{};
+    VAddr cpu_addr{};
+    VAddr cpu_addr_end{};
     std::size_t size{};
     u64 epoch{};
 };
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 186aca61d..b57c0d4d4 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -19,6 +19,7 @@
 #include "common/alignment.h"
 #include "common/common_types.h"
 #include "core/core.h"
+#include "core/memory.h"
 #include "video_core/buffer_cache/buffer_block.h"
 #include "video_core/buffer_cache/map_interval.h"
 #include "video_core/memory_manager.h"
@@ -37,28 +38,45 @@ public:
                             bool is_written = false, bool use_fast_cbuf = false) {
         std::lock_guard lock{mutex};
 
-        auto& memory_manager = system.GPU().MemoryManager();
-        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
-        if (!host_ptr) {
+        const std::optional<VAddr> cpu_addr_opt =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+
+        if (!cpu_addr_opt) {
             return {GetEmptyBuffer(size), 0};
         }
-        const auto cache_addr = ToCacheAddr(host_ptr);
+
+        VAddr cpu_addr = *cpu_addr_opt;
 
         // Cache management is a big overhead, so only cache entries with a given size.
         // TODO: Figure out which size is the best for given games.
         constexpr std::size_t max_stream_size = 0x800;
         if (use_fast_cbuf || size < max_stream_size) {
-            if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
+            if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
+                auto& memory_manager = system.GPU().MemoryManager();
                 if (use_fast_cbuf) {
-                    return ConstBufferUpload(host_ptr, size);
+                    if (memory_manager.IsGranularRange(gpu_addr, size)) {
+                        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+                        return ConstBufferUpload(host_ptr, size);
+                    } else {
+                        staging_buffer.resize(size);
+                        memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+                        return ConstBufferUpload(staging_buffer.data(), size);
+                    }
                 } else {
-                    return StreamBufferUpload(host_ptr, size, alignment);
+                    if (memory_manager.IsGranularRange(gpu_addr, size)) {
+                        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+                        return StreamBufferUpload(host_ptr, size, alignment);
+                    } else {
+                        staging_buffer.resize(size);
+                        memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+                        return StreamBufferUpload(staging_buffer.data(), size, alignment);
+                    }
                 }
             }
         }
 
-        auto block = GetBlock(cache_addr, size);
-        auto map = MapAddress(block, gpu_addr, cache_addr, size);
+        auto block = GetBlock(cpu_addr, size);
+        auto map = MapAddress(block, gpu_addr, cpu_addr, size);
         if (is_written) {
             map->MarkAsModified(true, GetModifiedTicks());
             if (!map->IsWritten()) {
@@ -71,7 +89,7 @@ public:
             }
         }
 
-        const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
+        const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
 
         return {ToHandle(block), offset};
     }
@@ -112,7 +130,7 @@ public:
     }
 
     /// Write any cached resources overlapping the specified region back to memory
-    void FlushRegion(CacheAddr addr, std::size_t size) {
+    void FlushRegion(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
         std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -127,7 +145,7 @@ public:
     }
 
     /// Mark the specified region as being invalidated
-    void InvalidateRegion(CacheAddr addr, u64 size) {
+    void InvalidateRegion(VAddr addr, u64 size) {
         std::lock_guard lock{mutex};
 
         std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -152,7 +170,7 @@ protected:
 
     virtual void WriteBarrier() = 0;
 
-    virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
+    virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
 
     virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
                                  const u8* data) = 0;
@@ -169,20 +187,17 @@ protected:
 
     /// Register an object into the cache
     void Register(const MapInterval& new_map, bool inherit_written = false) {
-        const CacheAddr cache_ptr = new_map->GetStart();
-        const std::optional<VAddr> cpu_addr =
-            system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
-        if (!cache_ptr || !cpu_addr) {
+        const VAddr cpu_addr = new_map->GetStart();
+        if (!cpu_addr) {
             LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
                          new_map->GetGpuAddress());
             return;
         }
         const std::size_t size = new_map->GetEnd() - new_map->GetStart();
-        new_map->SetCpuAddress(*cpu_addr);
         new_map->MarkAsRegistered(true);
         const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
         mapped_addresses.insert({interval, new_map});
-        rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
+        rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
         if (inherit_written) {
             MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
             new_map->MarkAsWritten(true);
@@ -192,7 +207,7 @@ protected:
     /// Unregisters an object from the cache
     void Unregister(MapInterval& map) {
         const std::size_t size = map->GetEnd() - map->GetStart();
-        rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1);
+        rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
         map->MarkAsRegistered(false);
         if (map->IsWritten()) {
             UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -202,32 +217,39 @@ protected:
     }
 
 private:
-    MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) {
+    MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
         return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
     }
 
-    MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr,
-                           const CacheAddr cache_addr, const std::size_t size) {
+    MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
+                           const std::size_t size) {
 
-        std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
+        std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
         if (overlaps.empty()) {
-            const CacheAddr cache_addr_end = cache_addr + size;
-            MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr);
-            u8* host_ptr = FromCacheAddr(cache_addr);
-            UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
+            auto& memory_manager = system.GPU().MemoryManager();
+            const VAddr cpu_addr_end = cpu_addr + size;
+            MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
+            if (memory_manager.IsGranularRange(gpu_addr, size)) {
+                u8* host_ptr = memory_manager.GetPointer(gpu_addr);
+                UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
+            } else {
+                staging_buffer.resize(size);
+                memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+                UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
+            }
             Register(new_map);
             return new_map;
         }
 
-        const CacheAddr cache_addr_end = cache_addr + size;
+        const VAddr cpu_addr_end = cpu_addr + size;
         if (overlaps.size() == 1) {
             MapInterval& current_map = overlaps[0];
-            if (current_map->IsInside(cache_addr, cache_addr_end)) {
+            if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
                 return current_map;
             }
         }
-        CacheAddr new_start = cache_addr;
-        CacheAddr new_end = cache_addr_end;
+        VAddr new_start = cpu_addr;
+        VAddr new_end = cpu_addr_end;
         bool write_inheritance = false;
         bool modified_inheritance = false;
         // Calculate new buffer parameters
@@ -237,7 +259,7 @@ private:
             write_inheritance |= overlap->IsWritten();
             modified_inheritance |= overlap->IsModified();
         }
-        GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
+        GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
         for (auto& overlap : overlaps) {
             Unregister(overlap);
         }
@@ -250,7 +272,7 @@ private:
         return new_map;
     }
 
-    void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end,
+    void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
                      std::vector<MapInterval>& overlaps) {
         const IntervalType base_interval{start, end};
         IntervalSet interval_set{};
@@ -262,13 +284,15 @@ private:
         for (auto& interval : interval_set) {
             std::size_t size = interval.upper() - interval.lower();
             if (size > 0) {
-                u8* host_ptr = FromCacheAddr(interval.lower());
-                UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr);
+                staging_buffer.resize(size);
+                system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
+                UploadBlockData(block, block->GetOffset(interval.lower()), size,
+                                staging_buffer.data());
             }
         }
     }
 
-    std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
+    std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
         if (size == 0) {
             return {};
         }
@@ -290,8 +314,9 @@ private:
     void FlushMap(MapInterval map) {
         std::size_t size = map->GetEnd() - map->GetStart();
         TBuffer block = blocks[map->GetStart() >> block_page_bits];
-        u8* host_ptr = FromCacheAddr(map->GetStart());
-        DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr);
+        staging_buffer.resize(size);
+        DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
+        system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
         map->MarkAsModified(false, 0);
     }
 
@@ -316,14 +341,14 @@ private:
     TBuffer EnlargeBlock(TBuffer buffer) {
         const std::size_t old_size = buffer->GetSize();
         const std::size_t new_size = old_size + block_page_size;
-        const CacheAddr cache_addr = buffer->GetCacheAddr();
-        TBuffer new_buffer = CreateBlock(cache_addr, new_size);
+        const VAddr cpu_addr = buffer->GetCpuAddr();
+        TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
         CopyBlock(buffer, new_buffer, 0, 0, old_size);
         buffer->SetEpoch(epoch);
         pending_destruction.push_back(buffer);
-        const CacheAddr cache_addr_end = cache_addr + new_size - 1;
-        u64 page_start = cache_addr >> block_page_bits;
-        const u64 page_end = cache_addr_end >> block_page_bits;
+        const VAddr cpu_addr_end = cpu_addr + new_size - 1;
+        u64 page_start = cpu_addr >> block_page_bits;
+        const u64 page_end = cpu_addr_end >> block_page_bits;
         while (page_start <= page_end) {
             blocks[page_start] = new_buffer;
             ++page_start;
@@ -334,9 +359,9 @@ private:
     TBuffer MergeBlocks(TBuffer first, TBuffer second) {
         const std::size_t size_1 = first->GetSize();
         const std::size_t size_2 = second->GetSize();
-        const CacheAddr first_addr = first->GetCacheAddr();
-        const CacheAddr second_addr = second->GetCacheAddr();
-        const CacheAddr new_addr = std::min(first_addr, second_addr);
+        const VAddr first_addr = first->GetCpuAddr();
+        const VAddr second_addr = second->GetCpuAddr();
+        const VAddr new_addr = std::min(first_addr, second_addr);
         const std::size_t new_size = size_1 + size_2;
         TBuffer new_buffer = CreateBlock(new_addr, new_size);
         CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
@@ -345,9 +370,9 @@ private:
         second->SetEpoch(epoch);
         pending_destruction.push_back(first);
         pending_destruction.push_back(second);
-        const CacheAddr cache_addr_end = new_addr + new_size - 1;
+        const VAddr cpu_addr_end = new_addr + new_size - 1;
         u64 page_start = new_addr >> block_page_bits;
-        const u64 page_end = cache_addr_end >> block_page_bits;
+        const u64 page_end = cpu_addr_end >> block_page_bits;
         while (page_start <= page_end) {
             blocks[page_start] = new_buffer;
             ++page_start;
@@ -355,18 +380,18 @@ private:
         return new_buffer;
     }
 
-    TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
+    TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
         TBuffer found{};
-        const CacheAddr cache_addr_end = cache_addr + size - 1;
-        u64 page_start = cache_addr >> block_page_bits;
-        const u64 page_end = cache_addr_end >> block_page_bits;
+        const VAddr cpu_addr_end = cpu_addr + size - 1;
+        u64 page_start = cpu_addr >> block_page_bits;
+        const u64 page_end = cpu_addr_end >> block_page_bits;
         while (page_start <= page_end) {
             auto it = blocks.find(page_start);
             if (it == blocks.end()) {
                 if (found) {
                     found = EnlargeBlock(found);
                 } else {
-                    const CacheAddr start_addr = (page_start << block_page_bits);
+                    const VAddr start_addr = (page_start << block_page_bits);
                     found = CreateBlock(start_addr, block_page_size);
                     blocks[page_start] = found;
                 }
@@ -386,7 +411,7 @@ private:
         return found;
     }
 
-    void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
+    void MarkRegionAsWritten(const VAddr start, const VAddr end) {
         u64 page_start = start >> write_page_bit;
         const u64 page_end = end >> write_page_bit;
         while (page_start <= page_end) {
@@ -400,7 +425,7 @@ private:
         }
     }
 
-    void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
+    void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
         u64 page_start = start >> write_page_bit;
         const u64 page_end = end >> write_page_bit;
         while (page_start <= page_end) {
@@ -416,7 +441,7 @@ private:
         }
     }
 
-    bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const {
+    bool IsRegionWritten(const VAddr start, const VAddr end) const {
         u64 page_start = start >> write_page_bit;
         const u64 page_end = end >> write_page_bit;
         while (page_start <= page_end) {
@@ -440,8 +465,8 @@ private:
     u64 buffer_offset = 0;
     u64 buffer_offset_base = 0;
 
-    using IntervalSet = boost::icl::interval_set<CacheAddr>;
-    using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>;
+    using IntervalSet = boost::icl::interval_set<VAddr>;
+    using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
     using IntervalType = typename IntervalCache::interval_type;
     IntervalCache mapped_addresses;
 
@@ -456,6 +481,8 @@ private:
     u64 epoch = 0;
     u64 modified_ticks = 0;
 
+    std::vector<u8> staging_buffer;
+
     std::recursive_mutex mutex;
 };
 
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index 3a104d5cd..b0956029d 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -11,7 +11,7 @@ namespace VideoCommon {
 
 class MapIntervalBase {
 public:
-    MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr)
+    MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
         : start{start}, end{end}, gpu_addr{gpu_addr} {}
 
     void SetCpuAddress(VAddr new_cpu_addr) {
@@ -26,7 +26,7 @@ public:
         return gpu_addr;
     }
 
-    bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
+    bool IsInside(const VAddr other_start, const VAddr other_end) const {
         return (start <= other_start && other_end <= end);
     }
 
@@ -46,11 +46,11 @@ public:
         return is_registered;
     }
 
-    CacheAddr GetStart() const {
+    VAddr GetStart() const {
         return start;
     }
 
-    CacheAddr GetEnd() const {
+    VAddr GetEnd() const {
         return end;
     }
 
@@ -76,8 +76,8 @@ public:
     }
 
 private:
-    CacheAddr start;
-    CacheAddr end;
+    VAddr start;
+    VAddr end;
     GPUVAddr gpu_addr;
     VAddr cpu_addr{};
     bool is_written{};
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 930b605af..498936f0c 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1712,6 +1712,7 @@ public:
         BRK,
         DEPBAR,
         VOTE,
+        VOTE_VTG,
         SHFL,
         FSWZADD,
         BFE_C,
@@ -1758,6 +1759,7 @@ public:
         IPA,
         OUT_R, // Emit vertex/primitive
         ISBERD,
+        BAR,
         MEMBAR,
         VMAD,
         VSETP,
@@ -1842,7 +1844,7 @@ public:
         MOV_C,
         MOV_R,
         MOV_IMM,
-        MOV_SYS,
+        S2R,
         MOV32_IMM,
         SHL_C,
         SHL_R,
@@ -2026,6 +2028,7 @@ private:
             INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
             INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
             INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
+            INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
             INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
             INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
             INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
@@ -2063,6 +2066,7 @@ private:
             INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
             INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
             INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
+            INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
             INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
             INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
             INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
@@ -2134,7 +2138,7 @@ private:
             INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
             INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
             INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
-            INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"),
+            INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
             INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
             INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
             INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ced9d7e28..1a2d747be 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -270,13 +270,13 @@ public:
     virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
+    virtual void FlushRegion(VAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
+    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
+    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
 
 protected:
     virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 925be8d7b..cc434faf7 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     gpu_thread.SwapBuffers(framebuffer);
 }
 
-void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
+void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
     gpu_thread.FlushRegion(addr, size);
 }
 
-void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
+void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
     gpu_thread.InvalidateRegion(addr, size);
 }
 
-void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     gpu_thread.FlushAndInvalidateRegion(addr, size);
 }
 
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 265c62758..03fd0eef0 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -27,9 +27,9 @@ public:
     void Start() override;
     void PushGPUEntries(Tegra::CommandList&& entries) override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-    void FlushRegion(CacheAddr addr, u64 size) override;
-    void InvalidateRegion(CacheAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void WaitIdle() const override;
 
 protected:
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index bd5278a5c..6f38a672a 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     renderer->SwapBuffers(framebuffer);
 }
 
-void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
+void GPUSynch::FlushRegion(VAddr addr, u64 size) {
     renderer->Rasterizer().FlushRegion(addr, size);
 }
 
-void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
+void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
     renderer->Rasterizer().InvalidateRegion(addr, size);
 }
 
-void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
 }
 
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 866a94c8c..4a6e9a01d 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -26,9 +26,9 @@ public:
     void Start() override;
     void PushGPUEntries(Tegra::CommandList&& entries) override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-    void FlushRegion(CacheAddr addr, u64 size) override;
-    void InvalidateRegion(CacheAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void WaitIdle() const override {}
 
 protected:
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 270c7ae0d..10cda686b 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
 }
 
-void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
+void ThreadManager::FlushRegion(VAddr addr, u64 size) {
     PushCommand(FlushRegionCommand(addr, size));
 }
 
-void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
+void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
     system.Renderer().Rasterizer().InvalidateRegion(addr, size);
 }
 
-void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
     InvalidateRegion(addr, size);
 }
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index be36c580e..cd74ad330 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -47,26 +47,26 @@ struct SwapBuffersCommand final {
 
 /// Command to signal to the GPU thread to flush a region
 struct FlushRegionCommand final {
-    explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
+    explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
 
-    CacheAddr addr;
+    VAddr addr;
     u64 size;
 };
 
 /// Command to signal to the GPU thread to invalidate a region
 struct InvalidateRegionCommand final {
-    explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
+    explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
 
-    CacheAddr addr;
+    VAddr addr;
     u64 size;
 };
 
 /// Command to signal to the GPU thread to flush and invalidate a region
 struct FlushAndInvalidateRegionCommand final {
-    explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
+    explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
         : addr{addr}, size{size} {}
 
-    CacheAddr addr;
+    VAddr addr;
     u64 size;
 };
 
@@ -111,13 +111,13 @@ public:
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    void FlushRegion(CacheAddr addr, u64 size);
+    void FlushRegion(VAddr addr, u64 size);
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
-    void InvalidateRegion(CacheAddr addr, u64 size);
+    void InvalidateRegion(VAddr addr, u64 size);
 
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
+    void FlushAndInvalidateRegion(VAddr addr, u64 size);
 
     // Wait until the gpu thread is idle.
     void WaitIdle() const;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index f5d33f27a..a3389d0d2 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
     ASSERT((gpu_addr & page_mask) == 0);
 
     const u64 aligned_size{Common::AlignUp(size, page_size)};
-    const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
     const auto cpu_addr = GpuToCpuAddress(gpu_addr);
     ASSERT(cpu_addr);
 
     // Flush and invalidate through the GPU interface, to be asynchronous if possible.
-    system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size);
+    system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
 
     UnmapRange(gpu_addr, aligned_size);
     ASSERT(system.CurrentProcess()
@@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const {
         return {};
     }
 
-    const u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    const u8* page_pointer{GetPointer(addr)};
     if (page_pointer) {
         // NOTE: Avoid adding any extra logic to this fast-path block
         T value;
-        std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
+        std::memcpy(&value, page_pointer, sizeof(T));
         return value;
     }
 
@@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
         return;
     }
 
-    u8* page_pointer{page_table.pointers[addr >> page_bits]};
+    u8* page_pointer{GetPointer(addr)};
     if (page_pointer) {
         // NOTE: Avoid adding any extra logic to this fast-path block
-        std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
+        std::memcpy(page_pointer, &data, sizeof(T));
         return;
     }
 
@@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
         return {};
     }
 
-    u8* const page_pointer{page_table.pointers[addr >> page_bits]};
-    if (page_pointer != nullptr) {
-        return page_pointer + (addr & page_mask);
+    auto& memory = system.Memory();
+
+    const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
+
+    if (page_addr != 0) {
+        return memory.GetPointer(page_addr + (addr & page_mask));
     }
 
     LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
         return {};
     }
 
-    const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
-    if (page_pointer != nullptr) {
-        return page_pointer + (addr & page_mask);
+    const auto& memory = system.Memory();
+
+    const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
+
+    if (page_addr != 0) {
+        return memory.GetPointer(page_addr + (addr & page_mask));
     }
 
     LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
     std::size_t page_index{src_addr >> page_bits};
     std::size_t page_offset{src_addr & page_mask};
 
+    auto& memory = system.Memory();
+
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 
         switch (page_table.attributes[page_index]) {
         case Common::PageType::Memory: {
-            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+            const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
             // Flush must happen on the rasterizer interface, such that memory is always synchronous
             // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
-            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
-            std::memcpy(dest_buffer, src_ptr, copy_amount);
+            rasterizer.FlushRegion(src_addr, copy_amount);
+            memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
             break;
         }
         default:
@@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
     std::size_t page_index{src_addr >> page_bits};
     std::size_t page_offset{src_addr & page_mask};
 
+    auto& memory = system.Memory();
+
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
         const u8* page_pointer = page_table.pointers[page_index];
         if (page_pointer) {
-            const u8* src_ptr{page_pointer + page_offset};
-            std::memcpy(dest_buffer, src_ptr, copy_amount);
+            const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
+            memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
         } else {
             std::memset(dest_buffer, 0, copy_amount);
         }
@@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
     std::size_t page_index{dest_addr >> page_bits};
     std::size_t page_offset{dest_addr & page_mask};
 
+    auto& memory = system.Memory();
+
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
 
         switch (page_table.attributes[page_index]) {
         case Common::PageType::Memory: {
-            u8* dest_ptr{page_table.pointers[page_index] + page_offset};
+            const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
             // Invalidate must happen on the rasterizer interface, such that memory is always
             // synchronous when it is written (even when in asynchronous GPU mode).
-            rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
-            std::memcpy(dest_ptr, src_buffer, copy_amount);
+            rasterizer.InvalidateRegion(dest_addr, copy_amount);
+            memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
             break;
         }
         default:
@@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
     std::size_t page_index{dest_addr >> page_bits};
     std::size_t page_offset{dest_addr & page_mask};
 
+    auto& memory = system.Memory();
+
     while (remaining_size > 0) {
         const std::size_t copy_amount{
             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
         u8* page_pointer = page_table.pointers[page_index];
         if (page_pointer) {
-            u8* dest_ptr{page_pointer + page_offset};
-            std::memcpy(dest_ptr, src_buffer, copy_amount);
+            const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
+            memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
         }
         page_index++;
         page_offset = 0;
@@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
 }
 
 void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
-    std::size_t remaining_size{size};
-    std::size_t page_index{src_addr >> page_bits};
-    std::size_t page_offset{src_addr & page_mask};
-
-    while (remaining_size > 0) {
-        const std::size_t copy_amount{
-            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
-
-        switch (page_table.attributes[page_index]) {
-        case Common::PageType::Memory: {
-            // Flush must happen on the rasterizer interface, such that memory is always synchronous
-            // when it is copied (even when in asynchronous GPU mode).
-            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
-            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
-            WriteBlock(dest_addr, src_ptr, copy_amount);
-            break;
-        }
-        default:
-            UNREACHABLE();
-        }
-
-        page_index++;
-        page_offset = 0;
-        dest_addr += static_cast<VAddr>(copy_amount);
-        src_addr += static_cast<VAddr>(copy_amount);
-        remaining_size -= copy_amount;
-    }
+    std::vector<u8> tmp_buffer(size);
+    ReadBlock(src_addr, tmp_buffer.data(), size);
+    WriteBlock(dest_addr, tmp_buffer.data(), size);
 }
 
 void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
@@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const
     WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
 }
 
+bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
+    const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
+    const std::size_t page = (addr & Memory::PAGE_MASK) + size;
+    return page <= Memory::PAGE_SIZE;
+}
+
 void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
                              VAddr backing_addr) {
     LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 073bdb491..0d9468535 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -97,6 +97,11 @@ public:
     void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
     void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
 
+    /**
+     * IsGranularRange checks if a gpu region can be simply read with a pointer
+     */
+    bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
+
 private:
     using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
     using VMAHandle = VMAMap::const_iterator;
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index e66054ed0..5ea2b01f2 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -98,12 +98,12 @@ public:
                                                       static_cast<QueryCache&>(*this),
                                                       VideoCore::QueryType::SamplesPassed}}} {}
 
-    void InvalidateRegion(CacheAddr addr, std::size_t size) {
+    void InvalidateRegion(VAddr addr, std::size_t size) {
         std::unique_lock lock{mutex};
         FlushAndRemoveRegion(addr, size);
     }
 
-    void FlushRegion(CacheAddr addr, std::size_t size) {
+    void FlushRegion(VAddr addr, std::size_t size) {
         std::unique_lock lock{mutex};
         FlushAndRemoveRegion(addr, size);
     }
@@ -117,14 +117,16 @@ public:
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
         std::unique_lock lock{mutex};
         auto& memory_manager = system.GPU().MemoryManager();
-        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+        const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
+        ASSERT(cpu_addr_opt);
+        VAddr cpu_addr = *cpu_addr_opt;
 
-        CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
+        CachedQuery* query = TryGet(cpu_addr);
         if (!query) {
-            const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
-            ASSERT_OR_EXECUTE(cpu_addr, return;);
+            ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
+            const auto host_ptr = memory_manager.GetPointer(gpu_addr);
 
-            query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
+            query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
         }
 
         query->BindCounter(Stream(type).Current(), timestamp);
@@ -173,11 +175,11 @@ protected:
 
 private:
     /// Flushes a memory range to guest memory and removes it from the cache.
-    void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
+    void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
         const u64 addr_begin = static_cast<u64>(addr);
         const u64 addr_end = addr_begin + static_cast<u64>(size);
         const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
-            const u64 cache_begin = query.GetCacheAddr();
+            const u64 cache_begin = query.GetCpuAddr();
             const u64 cache_end = cache_begin + query.SizeInBytes();
             return cache_begin < addr_end && addr_begin < cache_end;
         };
@@ -193,7 +195,7 @@ private:
                 if (!in_range(query)) {
                     continue;
                 }
-                rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
+                rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
                 query.Flush();
             }
             contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
@@ -204,22 +206,21 @@ private:
     /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
     CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
         rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
-        const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
+        const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
         return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
                                                   host_ptr);
     }
 
     /// Tries to a get a cached query. Returns nullptr on failure.
-    CachedQuery* TryGet(CacheAddr addr) {
+    CachedQuery* TryGet(VAddr addr) {
         const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
         const auto it = cached_queries.find(page);
         if (it == std::end(cached_queries)) {
             return nullptr;
         }
         auto& contents = it->second;
-        const auto found =
-            std::find_if(std::begin(contents), std::end(contents),
-                         [addr](auto& query) { return query.GetCacheAddr() == addr; });
+        const auto found = std::find_if(std::begin(contents), std::end(contents),
+                                        [addr](auto& query) { return query.GetCpuAddr() == addr; });
         return found != std::end(contents) ? &*found : nullptr;
     }
 
@@ -323,14 +324,10 @@ public:
         timestamp = timestamp_;
     }
 
-    VAddr CpuAddr() const noexcept {
+    VAddr GetCpuAddr() const noexcept {
         return cpu_addr;
     }
 
-    CacheAddr GetCacheAddr() const noexcept {
-        return ToCacheAddr(host_ptr);
-    }
-
     u64 SizeInBytes() const noexcept {
         return SizeInBytes(timestamp.has_value());
     }
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 6de1597a2..22987751e 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -18,22 +18,14 @@
 
 class RasterizerCacheObject {
 public:
-    explicit RasterizerCacheObject(const u8* host_ptr)
-        : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
+    explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
 
     virtual ~RasterizerCacheObject();
 
-    CacheAddr GetCacheAddr() const {
-        return cache_addr;
+    VAddr GetCpuAddr() const {
+        return cpu_addr;
     }
 
-    const u8* GetHostPtr() const {
-        return host_ptr;
-    }
-
-    /// Gets the address of the shader in guest memory, required for cache management
-    virtual VAddr GetCpuAddr() const = 0;
-
     /// Gets the size of the shader in guest memory, required for cache management
     virtual std::size_t GetSizeInBytes() const = 0;
 
@@ -68,8 +60,7 @@ private:
     bool is_registered{};      ///< Whether the object is currently registered with the cache
     bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
     u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
-    const u8* host_ptr{};      ///< Pointer to the memory backing this cached region
-    CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space
+    VAddr cpu_addr{};          ///< Cpu address memory, unique from emulated virtual address space
 };
 
 template <class T>
@@ -80,7 +71,7 @@ public:
     explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
 
     /// Write any cached resources overlapping the specified region back to memory
-    void FlushRegion(CacheAddr addr, std::size_t size) {
+    void FlushRegion(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
         const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -90,7 +81,7 @@ public:
     }
 
     /// Mark the specified region as being invalidated
-    void InvalidateRegion(CacheAddr addr, u64 size) {
+    void InvalidateRegion(VAddr addr, u64 size) {
         std::lock_guard lock{mutex};
 
         const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -114,27 +105,20 @@ public:
 
 protected:
     /// Tries to get an object from the cache with the specified cache address
-    T TryGet(CacheAddr addr) const {
+    T TryGet(VAddr addr) const {
         const auto iter = map_cache.find(addr);
         if (iter != map_cache.end())
             return iter->second;
         return nullptr;
     }
 
-    T TryGet(const void* addr) const {
-        const auto iter = map_cache.find(ToCacheAddr(addr));
-        if (iter != map_cache.end())
-            return iter->second;
-        return nullptr;
-    }
-
     /// Register an object into the cache
     virtual void Register(const T& object) {
         std::lock_guard lock{mutex};
 
         object->SetIsRegistered(true);
         interval_cache.add({GetInterval(object), ObjectSet{object}});
-        map_cache.insert({object->GetCacheAddr(), object});
+        map_cache.insert({object->GetCpuAddr(), object});
         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
     }
 
@@ -144,7 +128,7 @@ protected:
 
         object->SetIsRegistered(false);
         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
-        const CacheAddr addr = object->GetCacheAddr();
+        const VAddr addr = object->GetCpuAddr();
         interval_cache.subtract({GetInterval(object), ObjectSet{object}});
         map_cache.erase(addr);
     }
@@ -173,7 +157,7 @@ protected:
 
 private:
     /// Returns a list of cached objects from the specified memory region, ordered by access time
-    std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
+    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
         if (size == 0) {
             return {};
         }
@@ -197,13 +181,13 @@ private:
     }
 
     using ObjectSet = std::set<T>;
-    using ObjectCache = std::unordered_map<CacheAddr, T>;
-    using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
+    using ObjectCache = std::unordered_map<VAddr, T>;
+    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
     using ObjectInterval = typename IntervalCache::interval_type;
 
     static auto GetInterval(const T& object) {
-        return ObjectInterval::right_open(object->GetCacheAddr(),
-                                          object->GetCacheAddr() + object->GetSizeInBytes());
+        return ObjectInterval::right_open(object->GetCpuAddr(),
+                                          object->GetCpuAddr() + object->GetSizeInBytes());
     }
 
     ObjectCache map_cache;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 1a68e3caa..8ae5b9c4e 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -53,14 +53,14 @@ public:
     virtual void FlushAll() = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
+    virtual void FlushRegion(VAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
+    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     /// and invalidated
-    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
+    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
 
     /// Notify the rasterizer to send all written commands to the host GPU.
     virtual void FlushCommands() = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 0375fca17..4eb37a96c 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
 
-CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
-    : VideoCommon::BufferBlock{cache_addr, size} {
+CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
+    : VideoCommon::BufferBlock{cpu_addr, size} {
     gl_buffer.Create();
     glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
 }
@@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
     glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
 }
 
-Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
-    return std::make_shared<CachedBufferBlock>(cache_addr, size);
+Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(cpu_addr, size);
 }
 
 void OGLBufferCache::WriteBarrier() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 8c7145443..d94a11252 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf
 
 class CachedBufferBlock : public VideoCommon::BufferBlock {
 public:
-    explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size);
+    explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
     ~CachedBufferBlock();
 
     const GLuint* GetHandle() const {
@@ -55,7 +55,7 @@ public:
     }
 
 protected:
-    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
+    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
 
     void WriteBarrier() override;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 346feeb2f..368f399df 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -656,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
 
 void RasterizerOpenGL::FlushAll() {}
 
-void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
+void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-    if (!addr || !size) {
+    if (addr == 0 || size == 0) {
         return;
     }
     texture_cache.FlushRegion(addr, size);
@@ -666,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
     query_cache.FlushRegion(addr, size);
 }
 
-void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
+void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-    if (!addr || !size) {
+    if (addr == 0 || size == 0) {
         return;
     }
     texture_cache.InvalidateRegion(addr, size);
@@ -677,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
     query_cache.InvalidateRegion(addr, size);
 }
 
-void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     if (Settings::values.use_accurate_gpu_emulation) {
         FlushRegion(addr, size);
     }
@@ -716,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
 
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 
-    const auto surface{
-        texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
+    const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
     if (!surface) {
         return {};
     }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 2d3be2437..212dad852 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -65,9 +65,9 @@ public:
     void ResetCounter(VideoCore::QueryType type) override;
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void FlushAll() override;
-    void FlushRegion(CacheAddr addr, u64 size) override;
-    void InvalidateRegion(CacheAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void FlushCommands() override;
     void TickFrame() override;
     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 046ee55a5..6d2ff20f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
 
 } // Anonymous namespace
 
-CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
+CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
                            std::shared_ptr<VideoCommon::Shader::Registry> registry,
                            ShaderEntries entries, std::shared_ptr<OGLProgram> program)
-    : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
-      cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
+    : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
+      size_in_bytes{size_in_bytes}, program{std::move(program)} {}
 
 CachedShader::~CachedShader() = default;
 
@@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
     entry.bindless_samplers = registry->GetBindlessSamplers();
     params.disk_cache.SaveEntry(std::move(entry));
 
-    return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
-                                                          size_in_bytes, std::move(registry),
-                                                          MakeEntries(ir), std::move(program)));
+    return std::shared_ptr<CachedShader>(new CachedShader(
+        params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
 }
 
 Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
     entry.bindless_samplers = registry->GetBindlessSamplers();
     params.disk_cache.SaveEntry(std::move(entry));
 
-    return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
-                                                          size_in_bytes, std::move(registry),
-                                                          MakeEntries(ir), std::move(program)));
+    return std::shared_ptr<CachedShader>(new CachedShader(
+        params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
 }
 
 Shader CachedShader::CreateFromCache(const ShaderParameters& params,
                                      const PrecompiledShader& precompiled_shader,
                                      std::size_t size_in_bytes) {
-    return std::shared_ptr<CachedShader>(new CachedShader(
-        params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
-        precompiled_shader.entries, precompiled_shader.program));
+    return std::shared_ptr<CachedShader>(
+        new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
+                         precompiled_shader.entries, precompiled_shader.program));
 }
 
 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     const GPUVAddr address{GetShaderAddress(system, program)};
 
     // Look up shader in the cache based on address
-    const auto host_ptr{memory_manager.GetPointer(address)};
-    Shader shader{TryGet(host_ptr)};
+    const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
+    Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
     if (shader) {
         return last_shaders[static_cast<std::size_t>(program)] = shader;
     }
 
+    const auto host_ptr{memory_manager.GetPointer(address)};
+
     // No shader found - create a new one
     ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
     ProgramCode code_b;
@@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 
     const auto unique_identifier = GetUniqueIdentifier(
         GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
-    const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
-    const ShaderParameters params{system,   disk_cache, device,
-                                  cpu_addr, host_ptr,   unique_identifier};
+
+    const ShaderParameters params{system,    disk_cache, device,
+                                  *cpu_addr, host_ptr,   unique_identifier};
 
     const auto found = runtime_cache.find(unique_identifier);
     if (found == runtime_cache.end()) {
@@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 
 Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
     auto& memory_manager{system.GPU().MemoryManager()};
-    const auto host_ptr{memory_manager.GetPointer(code_addr)};
-    auto kernel = TryGet(host_ptr);
+    const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
+
+    auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
     if (kernel) {
         return kernel;
     }
 
+    const auto host_ptr{memory_manager.GetPointer(code_addr)};
     // No kernel found, create a new one
     auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
     const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
-    const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
-    const ShaderParameters params{system,   disk_cache, device,
-                                  cpu_addr, host_ptr,   unique_identifier};
+
+    const ShaderParameters params{system,    disk_cache, device,
+                                  *cpu_addr, host_ptr,   unique_identifier};
 
     const auto found = runtime_cache.find(unique_identifier);
     if (found == runtime_cache.end()) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 4935019fc..c836df5bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -65,11 +65,6 @@ public:
     /// Gets the GL program handle for the shader
     GLuint GetHandle() const;
 
-    /// Returns the guest CPU address of the shader
-    VAddr GetCpuAddr() const override {
-        return cpu_addr;
-    }
-
     /// Returns the size in bytes of the shader
     std::size_t GetSizeInBytes() const override {
         return size_in_bytes;
@@ -90,13 +85,12 @@ public:
                                   std::size_t size_in_bytes);
 
 private:
-    explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
+    explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
                           std::shared_ptr<VideoCommon::Shader::Registry> registry,
                           ShaderEntries entries, std::shared_ptr<OGLProgram> program);
 
     std::shared_ptr<VideoCommon::Shader::Registry> registry;
     ShaderEntries entries;
-    VAddr cpu_addr = 0;
     std::size_t size_in_bytes = 0;
     std::shared_ptr<OGLProgram> program;
 };
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1ba544943..326d74f29 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
 } // Anonymous namespace
 
 CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
-                                     CacheAddr cache_addr, std::size_t size)
-    : VideoCommon::BufferBlock{cache_addr, size} {
+                                     VAddr cpu_addr, std::size_t size)
+    : VideoCommon::BufferBlock{cpu_addr, size} {
     const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
                                          BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
                                              vk::BufferUsageFlagBits::eTransferDst,
@@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
 
 VKBufferCache::~VKBufferCache() = default;
 
-Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
-    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
+Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
 }
 
 const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3f38eed0c..508214618 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -30,7 +30,7 @@ class VKScheduler;
 class CachedBufferBlock final : public VideoCommon::BufferBlock {
 public:
     explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
-                               CacheAddr cache_addr, std::size_t size);
+                               VAddr cpu_addr, std::size_t size);
     ~CachedBufferBlock();
 
     const vk::Buffer* GetHandle() const {
@@ -55,7 +55,7 @@ public:
 protected:
     void WriteBarrier() override {}
 
-    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
+    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
 
     const vk::Buffer* ToHandle(const Buffer& buffer) override;
 
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 557b9d662..c2a426aeb 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
 } // Anonymous namespace
 
 CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
-                           GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
-                           ProgramCode program_code, u32 main_offset)
-    : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
-      program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
-      shader_ir{this->program_code, main_offset, compiler_settings, registry},
+                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
+                           u32 main_offset)
+    : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
+      registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
+                                                           compiler_settings, registry},
       entries{GenerateShaderEntries(shader_ir)} {}
 
 CachedShader::~CachedShader() = default;
@@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
 
         auto& memory_manager{system.GPU().MemoryManager()};
         const GPUVAddr program_addr{GetShaderAddress(system, program)};
-        const auto host_ptr{memory_manager.GetPointer(program_addr)};
-        auto shader = TryGet(host_ptr);
+        const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+        ASSERT(cpu_addr);
+        auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
         if (!shader) {
+            const auto host_ptr{memory_manager.GetPointer(program_addr)};
+
             // No shader found - create a new one
             constexpr u32 stage_offset = 10;
             const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
             auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
 
-            const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
-            ASSERT(cpu_addr);
-
             shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
-                                                    host_ptr, std::move(code), stage_offset);
+                                                    std::move(code), stage_offset);
             Register(shader);
         }
         shaders[index] = std::move(shader);
@@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
 
     auto& memory_manager = system.GPU().MemoryManager();
     const auto program_addr = key.shader;
-    const auto host_ptr = memory_manager.GetPointer(program_addr);
 
-    auto shader = TryGet(host_ptr);
+    const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+    ASSERT(cpu_addr);
+
+    auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
     if (!shader) {
         // No shader found - create a new one
-        const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
-        ASSERT(cpu_addr);
+        const auto host_ptr = memory_manager.GetPointer(program_addr);
 
         auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
         constexpr u32 kernel_main_offset = 0;
         shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
-                                                program_addr, *cpu_addr, host_ptr, std::move(code),
+                                                program_addr, *cpu_addr, std::move(code),
                                                 kernel_main_offset);
         Register(shader);
     }
@@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
         }
 
         const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
-        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
-        const auto shader = TryGet(host_ptr);
+        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+        ASSERT(cpu_addr);
+        const auto shader = TryGet(*cpu_addr);
         ASSERT(shader);
 
         const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index c4c112290..27c01732f 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -113,17 +113,13 @@ namespace Vulkan {
 class CachedShader final : public RasterizerCacheObject {
 public:
     explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
-                          VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
+                          VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
     ~CachedShader();
 
     GPUVAddr GetGpuAddr() const {
         return gpu_addr;
     }
 
-    VAddr GetCpuAddr() const override {
-        return cpu_addr;
-    }
-
     std::size_t GetSizeInBytes() const override {
         return program_code.size() * sizeof(u64);
     }
@@ -149,7 +145,6 @@ private:
                                                                  Tegra::Engines::ShaderType stage);
 
     GPUVAddr gpu_addr{};
-    VAddr cpu_addr{};
     ProgramCode program_code;
     VideoCommon::Shader::Registry registry;
     VideoCommon::Shader::ShaderIR shader_ir;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 58c69b786..0a2ea4fd4 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
 
 void RasterizerVulkan::FlushAll() {}
 
-void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
+void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
+    if (addr == 0 || size == 0) {
+        return;
+    }
     texture_cache.FlushRegion(addr, size);
     buffer_cache.FlushRegion(addr, size);
     query_cache.FlushRegion(addr, size);
 }
 
-void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
+void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
+    if (addr == 0 || size == 0) {
+        return;
+    }
     texture_cache.InvalidateRegion(addr, size);
     pipeline_cache.InvalidateRegion(addr, size);
     buffer_cache.InvalidateRegion(addr, size);
     query_cache.InvalidateRegion(addr, size);
 }
 
-void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     FlushRegion(addr, size);
     InvalidateRegion(addr, size);
 }
@@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
         return false;
     }
 
-    const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)};
-    const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
+    const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
     if (!surface) {
         return false;
     }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 3185868e9..f642dde76 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -118,9 +118,9 @@ public:
     void ResetCounter(VideoCore::QueryType type) override;
     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
     void FlushAll() override;
-    void FlushRegion(CacheAddr addr, u64 size) override;
-    void InvalidateRegion(CacheAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
     void FlushCommands() override;
     void TickFrame() override;
     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index d2fe4ec5d..0dd7a1196 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -13,13 +13,247 @@
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
+#include "video_core/textures/texture.h"
 
 namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
+using Tegra::Shader::StoreType;
+using Tegra::Texture::ComponentType;
+using Tegra::Texture::TextureFormat;
+using Tegra::Texture::TICEntry;
 
 namespace {
+
+ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
+                               std::size_t component) {
+    const TextureFormat format{descriptor.format};
+    switch (format) {
+    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32_G32:
+    case TextureFormat::R16_G16:
+    case TextureFormat::R32:
+    case TextureFormat::R16:
+    case TextureFormat::R8:
+    case TextureFormat::R1:
+        if (component == 0) {
+            return descriptor.r_type;
+        }
+        if (component == 1) {
+            return descriptor.g_type;
+        }
+        if (component == 2) {
+            return descriptor.b_type;
+        }
+        if (component == 3) {
+            return descriptor.a_type;
+        }
+        break;
+    case TextureFormat::A8R8G8B8:
+        if (component == 0) {
+            return descriptor.a_type;
+        }
+        if (component == 1) {
+            return descriptor.r_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        if (component == 3) {
+            return descriptor.b_type;
+        }
+        break;
+    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A4B4G4R4:
+    case TextureFormat::A5B5G5R1:
+    case TextureFormat::A1B5G5R5:
+        if (component == 0) {
+            return descriptor.a_type;
+        }
+        if (component == 1) {
+            return descriptor.b_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        if (component == 3) {
+            return descriptor.r_type;
+        }
+        break;
+    case TextureFormat::R32_B24G8:
+        if (component == 0) {
+            return descriptor.r_type;
+        }
+        if (component == 1) {
+            return descriptor.b_type;
+        }
+        if (component == 2) {
+            return descriptor.g_type;
+        }
+        break;
+    case TextureFormat::B5G6R5:
+    case TextureFormat::B6G5R5:
+        if (component == 0) {
+            return descriptor.b_type;
+        }
+        if (component == 1) {
+            return descriptor.g_type;
+        }
+        if (component == 2) {
+            return descriptor.r_type;
+        }
+        break;
+    case TextureFormat::G8R24:
+    case TextureFormat::G24R8:
+    case TextureFormat::G8R8:
+    case TextureFormat::G4R4:
+        if (component == 0) {
+            return descriptor.g_type;
+        }
+        if (component == 1) {
+            return descriptor.r_type;
+        }
+        break;
+    }
+    UNIMPLEMENTED_MSG("texture format not implement={}", format);
+    return ComponentType::FLOAT;
+}
+
+bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
+    constexpr u8 R = 0b0001;
+    constexpr u8 G = 0b0010;
+    constexpr u8 B = 0b0100;
+    constexpr u8 A = 0b1000;
+    constexpr std::array<u8, 16> mask = {
+        0,   (R),     (G),     (R | G),     (B),     (R | B),     (G | B),     (R | G | B),
+        (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
+    return std::bitset<4>{mask.at(component_mask)}.test(component);
+}
+
+u32 GetComponentSize(TextureFormat format, std::size_t component) {
+    switch (format) {
+    case TextureFormat::R32_G32_B32_A32:
+        return 32;
+    case TextureFormat::R16_G16_B16_A16:
+        return 16;
+    case TextureFormat::R32_G32_B32:
+        return component <= 2 ? 32 : 0;
+    case TextureFormat::R32_G32:
+        return component <= 1 ? 32 : 0;
+    case TextureFormat::R16_G16:
+        return component <= 1 ? 16 : 0;
+    case TextureFormat::R32:
+        return component == 0 ? 32 : 0;
+    case TextureFormat::R16:
+        return component == 0 ? 16 : 0;
+    case TextureFormat::R8:
+        return component == 0 ? 8 : 0;
+    case TextureFormat::R1:
+        return component == 0 ? 1 : 0;
+    case TextureFormat::A8R8G8B8:
+        return 8;
+    case TextureFormat::A2B10G10R10:
+        return (component == 3 || component == 2 || component == 1) ? 10 : 2;
+    case TextureFormat::A4B4G4R4:
+        return 4;
+    case TextureFormat::A5B5G5R1:
+        return (component == 0 || component == 1 || component == 2) ? 5 : 1;
+    case TextureFormat::A1B5G5R5:
+        return (component == 1 || component == 2 || component == 3) ? 5 : 1;
+    case TextureFormat::R32_B24G8:
+        if (component == 0) {
+            return 32;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        if (component == 2) {
+            return 8;
+        }
+        return 0;
+    case TextureFormat::B5G6R5:
+        if (component == 0 || component == 2) {
+            return 5;
+        }
+        if (component == 1) {
+            return 6;
+        }
+        return 0;
+    case TextureFormat::B6G5R5:
+        if (component == 1 || component == 2) {
+            return 5;
+        }
+        if (component == 0) {
+            return 6;
+        }
+        return 0;
+    case TextureFormat::G8R24:
+        if (component == 0) {
+            return 8;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        return 0;
+    case TextureFormat::G24R8:
+        if (component == 0) {
+            return 8;
+        }
+        if (component == 1) {
+            return 24;
+        }
+        return 0;
+    case TextureFormat::G8R8:
+        return (component == 0 || component == 1) ? 8 : 0;
+    case TextureFormat::G4R4:
+        return (component == 0 || component == 1) ? 4 : 0;
+    default:
+        UNIMPLEMENTED_MSG("texture format not implement={}", format);
+        return 0;
+    }
+}
+
+std::size_t GetImageComponentMask(TextureFormat format) {
+    constexpr u8 R = 0b0001;
+    constexpr u8 G = 0b0010;
+    constexpr u8 B = 0b0100;
+    constexpr u8 A = 0b1000;
+    switch (format) {
+    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::A8R8G8B8:
+    case TextureFormat::A2B10G10R10:
+    case TextureFormat::A4B4G4R4:
+    case TextureFormat::A5B5G5R1:
+    case TextureFormat::A1B5G5R5:
+        return std::size_t{R | G | B | A};
+    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32_B24G8:
+    case TextureFormat::B5G6R5:
+    case TextureFormat::B6G5R5:
+        return std::size_t{R | G | B};
+    case TextureFormat::R32_G32:
+    case TextureFormat::R16_G16:
+    case TextureFormat::G8R24:
+    case TextureFormat::G24R8:
+    case TextureFormat::G8R8:
+    case TextureFormat::G4R4:
+        return std::size_t{R | G};
+    case TextureFormat::R32:
+    case TextureFormat::R16:
+    case TextureFormat::R8:
+    case TextureFormat::R1:
+        return std::size_t{R};
+    default:
+        UNIMPLEMENTED_MSG("texture format not implement={}", format);
+        return std::size_t{R | G | B | A};
+    }
+}
+
 std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
     switch (image_type) {
     case Tegra::Shader::ImageType::Texture1D:
@@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
 }
 } // Anonymous namespace
 
+std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
+                                                  Node original_value) {
+    switch (component_type) {
+    case ComponentType::SNORM: {
+        // range [-1.0, 1.0]
+        auto cnv_value = Operation(OperationCode::FMul, original_value,
+                                   Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
+        cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
+        return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
+    }
+    case ComponentType::SINT:
+    case ComponentType::UNORM: {
+        bool is_signed = component_type == ComponentType::SINT;
+        // range [0.0, 1.0]
+        auto cnv_value = Operation(OperationCode::FMul, original_value,
+                                   Immediate(static_cast<float>(1 << component_size) - 1.f));
+        return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
+                is_signed};
+    }
+    case ComponentType::UINT: // range [0, (1 << component_size) - 1]
+        return {std::move(original_value), false};
+    case ComponentType::FLOAT:
+        if (component_size == 16) {
+            return {Operation(OperationCode::HCastFloat, original_value), true};
+        } else {
+            return {std::move(original_value), true};
+        }
+    default:
+        UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
+        return {std::move(original_value), true};
+    }
+}
+
 u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
@@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
 
     switch (opcode->get().GetId()) {
     case OpCode::Id::SULD: {
-        UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
         UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
                          Tegra::Shader::OutOfBoundsStore::Ignore);
 
@@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
                                               : GetBindlessImage(instr.gpr39, type)};
         image.MarkRead();
 
-        u32 indexer = 0;
-        for (u32 element = 0; element < 4; ++element) {
-            if (!instr.suldst.IsComponentEnabled(element)) {
-                continue;
+        if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
+            u32 indexer = 0;
+            for (u32 element = 0; element < 4; ++element) {
+                if (!instr.suldst.IsComponentEnabled(element)) {
+                    continue;
+                }
+                MetaImage meta{image, {}, element};
+                Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
+                SetTemporary(bb, indexer++, std::move(value));
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+        } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
+            UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
+                             instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
+
+            auto descriptor = [this, instr] {
+                std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
+                if (instr.suldst.is_immediate) {
+                    descriptor =
+                        registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
+                } else {
+                    const Node image_register = GetRegister(instr.gpr39);
+                    const auto [base_image, buffer, offset] = TrackCbuf(
+                        image_register, global_code, static_cast<s64>(global_code.size()));
+                    descriptor = registry.ObtainBindlessSampler(buffer, offset);
+                }
+                if (!descriptor) {
+                    UNREACHABLE_MSG("Failed to obtain image descriptor");
+                }
+                return *descriptor;
+            }();
+
+            const auto comp_mask = GetImageComponentMask(descriptor.format);
+
+            switch (instr.suldst.GetStoreDataLayout()) {
+            case StoreType::Bits32:
+            case StoreType::Bits64: {
+                u32 indexer = 0;
+                u32 shifted_counter = 0;
+                Node value = Immediate(0);
+                for (u32 element = 0; element < 4; ++element) {
+                    if (!IsComponentEnabled(comp_mask, element)) {
+                        continue;
+                    }
+                    const auto component_type = GetComponentType(descriptor, element);
+                    const auto component_size = GetComponentSize(descriptor.format, element);
+                    MetaImage meta{image, {}, element};
+
+                    auto [converted_value, is_signed] = GetComponentValue(
+                        component_type, component_size,
+                        Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
+
+                    // shift element to correct position
+                    const auto shifted = shifted_counter;
+                    if (shifted > 0) {
+                        converted_value =
+                            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
+                                            std::move(converted_value), Immediate(shifted));
+                    }
+                    shifted_counter += component_size;
+
+                    // add value into result
+                    value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
+
+                    // if we shifted enough for 1 byte -> we save it into temp
+                    if (shifted_counter >= 32) {
+                        SetTemporary(bb, indexer++, std::move(value));
+                        // reset counter and value to prepare pack next byte
+                        value = Immediate(0);
+                        shifted_counter = 0;
+                    }
+                }
+                for (u32 i = 0; i < indexer; ++i) {
+                    SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+                }
+                break;
+            }
+            default:
+                UNREACHABLE();
+                break;
             }
-            MetaImage meta{image, {}, element};
-            Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
-            SetTemporary(bb, indexer++, std::move(value));
-        }
-        for (u32 i = 0; i < indexer; ++i) {
-            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
         }
         break;
     }
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index e6edec459..d4f95b18c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -71,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         bb.push_back(Operation(OperationCode::Discard));
         break;
     }
-    case OpCode::Id::MOV_SYS: {
+    case OpCode::Id::S2R: {
         const Node value = [this, instr] {
             switch (instr.sys20) {
             case SystemVariable::LaneId:
-                LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
+                LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
                 return Immediate(0U);
             case SystemVariable::InvocationId:
                 return Operation(OperationCode::InvocationId);
             case SystemVariable::Ydirection:
                 return Operation(OperationCode::YNegate);
             case SystemVariable::InvocationInfo:
-                LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
+                LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
+                return Immediate(0U);
+            case SystemVariable::WscaleFactorXY:
+                UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
+                return Immediate(0U);
+            case SystemVariable::WscaleFactorZ:
+                UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
                 return Immediate(0U);
             case SystemVariable::Tid: {
                 Node value = Immediate(0);
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index baf7188d2..8852c8a1b 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
     switch (cc) {
     case Tegra::Shader::ConditionCode::NEU:
         return GetInternalFlag(InternalFlag::Zero, true);
+    case Tegra::Shader::ConditionCode::FCSM_TR:
+        UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
+        return MakeNode<PredicateNode>(Pred::NeverExecute, false);
     default:
         UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
         return MakeNode<PredicateNode>(Pred::NeverExecute, false);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 80fc9b82c..ca6c976c9 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -312,6 +312,10 @@ private:
     /// Conditionally saturates a half float pair
     Node GetSaturatedHalfFloat(Node value, bool saturate = true);
 
+    /// Get image component value by type and size
+    std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
+                                            u32 component_size, Node original_value);
+
     /// Returns a predicate comparing two floats
     Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
     /// Returns a predicate comparing two integers
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 6fe815135..7af0e792c 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -190,22 +190,11 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
     MICROPROFILE_SCOPE(GPU_Load_Texture);
     auto& staging_buffer = staging_cache.GetBuffer(0);
     u8* host_ptr;
-    is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
-
-    // Handle continuouty
-    if (is_continuous) {
-        // Use physical memory directly
-        host_ptr = memory_manager.GetPointer(gpu_addr);
-        if (!host_ptr) {
-            return;
-        }
-    } else {
-        // Use an extra temporal buffer
-        auto& tmp_buffer = staging_cache.GetBuffer(1);
-        tmp_buffer.resize(guest_memory_size);
-        host_ptr = tmp_buffer.data();
-        memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
-    }
+    // Use an extra temporal buffer
+    auto& tmp_buffer = staging_cache.GetBuffer(1);
+    tmp_buffer.resize(guest_memory_size);
+    host_ptr = tmp_buffer.data();
+    memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
 
     if (params.is_tiled) {
         ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
@@ -257,19 +246,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
     auto& staging_buffer = staging_cache.GetBuffer(0);
     u8* host_ptr;
 
-    // Handle continuouty
-    if (is_continuous) {
-        // Use physical memory directly
-        host_ptr = memory_manager.GetPointer(gpu_addr);
-        if (!host_ptr) {
-            return;
-        }
-    } else {
-        // Use an extra temporal buffer
-        auto& tmp_buffer = staging_cache.GetBuffer(1);
-        tmp_buffer.resize(guest_memory_size);
-        host_ptr = tmp_buffer.data();
-    }
+    // Use an extra temporal buffer
+    auto& tmp_buffer = staging_cache.GetBuffer(1);
+    tmp_buffer.resize(guest_memory_size);
+    host_ptr = tmp_buffer.data();
 
     if (params.is_tiled) {
         ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
@@ -300,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
             }
         }
     }
-    if (!is_continuous) {
-        memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
-    }
+    memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
 }
 
 } // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index d7882a031..a39a8661b 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -68,8 +68,8 @@ public:
         return gpu_addr;
     }
 
-    bool Overlaps(const CacheAddr start, const CacheAddr end) const {
-        return (cache_addr < end) && (cache_addr_end > start);
+    bool Overlaps(const VAddr start, const VAddr end) const {
+        return (cpu_addr < end) && (cpu_addr_end > start);
     }
 
     bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
@@ -86,21 +86,13 @@ public:
         return cpu_addr;
     }
 
-    void SetCpuAddr(const VAddr new_addr) {
-        cpu_addr = new_addr;
-    }
-
-    CacheAddr GetCacheAddr() const {
-        return cache_addr;
-    }
-
-    CacheAddr GetCacheAddrEnd() const {
-        return cache_addr_end;
+    VAddr GetCpuAddrEnd() const {
+        return cpu_addr_end;
     }
 
-    void SetCacheAddr(const CacheAddr new_addr) {
-        cache_addr = new_addr;
-        cache_addr_end = new_addr + guest_memory_size;
+    void SetCpuAddr(const VAddr new_addr) {
+        cpu_addr = new_addr;
+        cpu_addr_end = new_addr + guest_memory_size;
     }
 
     const SurfaceParams& GetSurfaceParams() const {
@@ -119,14 +111,6 @@ public:
         return mipmap_sizes[level];
     }
 
-    void MarkAsContinuous(const bool is_continuous) {
-        this->is_continuous = is_continuous;
-    }
-
-    bool IsContinuous() const {
-        return is_continuous;
-    }
-
     bool IsLinear() const {
         return !params.is_tiled;
     }
@@ -175,10 +159,8 @@ protected:
     std::size_t guest_memory_size;
     std::size_t host_memory_size;
     GPUVAddr gpu_addr{};
-    CacheAddr cache_addr{};
-    CacheAddr cache_addr_end{};
     VAddr cpu_addr{};
-    bool is_continuous{};
+    VAddr cpu_addr_end{};
     bool is_converted{};
 
     std::vector<std::size_t> mipmap_sizes;
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 47b2aafbd..6f3ef45be 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -113,10 +113,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
         params.height = tic.Height();
         params.depth = tic.Depth();
         params.pitch = params.is_tiled ? 0 : tic.Pitch();
-        if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
-            params.depth = 1;
-        } else if (params.target == SurfaceTarget::TextureCubemap ||
-                   params.target == SurfaceTarget::TextureCubeArray) {
+        if (params.target == SurfaceTarget::TextureCubemap ||
+            params.target == SurfaceTarget::TextureCubeArray) {
             params.depth *= 6;
         }
         params.num_levels = tic.max_mip_level + 1;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c8f8d659d..88fe3e25f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
 
 template <typename TSurface, typename TView>
 class TextureCache {
-    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
-    using IntervalType = typename IntervalMap::interval_type;
 
 public:
-    void InvalidateRegion(CacheAddr addr, std::size_t size) {
+    void InvalidateRegion(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
         for (const auto& surface : GetSurfacesInRegion(addr, size)) {
@@ -76,7 +74,7 @@ public:
         guard_samplers = new_guard;
     }
 
-    void FlushRegion(CacheAddr addr, std::size_t size) {
+    void FlushRegion(VAddr addr, std::size_t size) {
         std::lock_guard lock{mutex};
 
         auto surfaces = GetSurfacesInRegion(addr, size);
@@ -99,9 +97,9 @@ public:
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
 
-        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
-        const auto cache_addr{ToCacheAddr(host_ptr)};
-        if (!cache_addr) {
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        if (!cpu_addr) {
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
 
@@ -110,7 +108,7 @@ public:
         }
 
         const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
-        const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
+        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
         if (guard_samplers) {
             sampled_textures.push_back(surface);
         }
@@ -124,13 +122,13 @@ public:
         if (!gpu_addr) {
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
-        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
-        const auto cache_addr{ToCacheAddr(host_ptr)};
-        if (!cache_addr) {
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        if (!cpu_addr) {
             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
         }
         const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
-        const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
+        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
         if (guard_samplers) {
             sampled_textures.push_back(surface);
         }
@@ -159,14 +157,14 @@ public:
             SetEmptyDepthBuffer();
             return {};
         }
-        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
-        const auto cache_addr{ToCacheAddr(host_ptr)};
-        if (!cache_addr) {
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        if (!cpu_addr) {
             SetEmptyDepthBuffer();
             return {};
         }
         const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
-        auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
+        auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
         if (depth_buffer.target)
             depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
         depth_buffer.target = surface_view.first;
@@ -199,15 +197,15 @@ public:
             return {};
         }
 
-        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
-        const auto cache_addr{ToCacheAddr(host_ptr)};
-        if (!cache_addr) {
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+        if (!cpu_addr) {
             SetEmptyColorBuffer(index);
             return {};
         }
 
         auto surface_view =
-            GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index),
+            GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
                        preserve_contents, true);
         if (render_targets[index].target)
             render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
@@ -257,27 +255,26 @@ public:
         const GPUVAddr src_gpu_addr = src_config.Address();
         const GPUVAddr dst_gpu_addr = dst_config.Address();
         DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
-        const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)};
-        const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)};
-        const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)};
-        const auto src_cache_addr{ToCacheAddr(src_host_ptr)};
+        const std::optional<VAddr> dst_cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
+        const std::optional<VAddr> src_cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
         std::pair<TSurface, TView> dst_surface =
-            GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false);
+            GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
         std::pair<TSurface, TView> src_surface =
-            GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false);
+            GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
         ImageBlit(src_surface.second, dst_surface.second, copy_config);
         dst_surface.first->MarkAsModified(true, Tick());
     }
 
-    TSurface TryFindFramebufferSurface(const u8* host_ptr) {
-        const CacheAddr cache_addr = ToCacheAddr(host_ptr);
-        if (!cache_addr) {
+    TSurface TryFindFramebufferSurface(VAddr addr) {
+        if (!addr) {
             return nullptr;
         }
-        const CacheAddr page = cache_addr >> registry_page_bits;
+        const VAddr page = addr >> registry_page_bits;
         std::vector<TSurface>& list = registry[page];
         for (auto& surface : list) {
-            if (surface->GetCacheAddr() == cache_addr) {
+            if (surface->GetCpuAddr() == addr) {
                 return surface;
             }
         }
@@ -338,18 +335,14 @@ protected:
 
     void Register(TSurface surface) {
         const GPUVAddr gpu_addr = surface->GetGpuAddr();
-        const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
         const std::size_t size = surface->GetSizeInBytes();
         const std::optional<VAddr> cpu_addr =
             system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
-        if (!cache_ptr || !cpu_addr) {
+        if (!cpu_addr) {
             LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
                          gpu_addr);
             return;
         }
-        const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
-        surface->MarkAsContinuous(continuous);
-        surface->SetCacheAddr(cache_ptr);
         surface->SetCpuAddr(*cpu_addr);
         RegisterInnerCache(surface);
         surface->MarkAsRegistered(true);
@@ -634,7 +627,7 @@ private:
     std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
                                                                const SurfaceParams& params,
                                                                const GPUVAddr gpu_addr,
-                                                               const CacheAddr cache_addr,
+                                                               const VAddr cpu_addr,
                                                                bool preserve_contents) {
         if (params.target == SurfaceTarget::Texture3D) {
             bool failed = false;
@@ -659,7 +652,7 @@ private:
                     failed = true;
                     break;
                 }
-                const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr);
+                const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
                 const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
                 modified |= surface->IsModified();
                 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
@@ -679,7 +672,7 @@ private:
         } else {
             for (const auto& surface : overlaps) {
                 if (!surface->MatchTarget(params.target)) {
-                    if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) {
+                    if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
                         if (Settings::values.use_accurate_gpu_emulation) {
                             return std::nullopt;
                         }
@@ -688,7 +681,7 @@ private:
                     }
                     return std::nullopt;
                 }
-                if (surface->GetCacheAddr() != cache_addr) {
+                if (surface->GetCpuAddr() != cpu_addr) {
                     continue;
                 }
                 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
@@ -722,13 +715,13 @@ private:
      *                          left blank.
      * @param is_render         Whether or not the surface is a render target.
      **/
-    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr,
+    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
                                           const SurfaceParams& params, bool preserve_contents,
                                           bool is_render) {
         // Step 1
         // Check Level 1 Cache for a fast structural match. If candidate surface
         // matches at certain level we are pretty much done.
-        if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
+        if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
             TSurface& current_surface = iter->second;
             const auto topological_result = current_surface->MatchesTopology(params);
             if (topological_result != MatchTopologyResult::FullMatch) {
@@ -755,7 +748,7 @@ private:
         // Step 2
         // Obtain all possible overlaps in the memory region
         const std::size_t candidate_size = params.GetGuestSizeInBytes();
-        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
+        auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
 
         // If none are found, we are done. we just load the surface and create it.
         if (overlaps.empty()) {
@@ -777,7 +770,7 @@ private:
         // Check if it's a 3D texture
         if (params.block_depth > 0) {
             auto surface =
-                Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
+                Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
             if (surface) {
                 return *surface;
             }
@@ -852,16 +845,16 @@ private:
      * @param params   The parameters on the candidate surface.
      **/
     Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
-        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
-        const auto cache_addr{ToCacheAddr(host_ptr)};
+        const std::optional<VAddr> cpu_addr =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
 
-        if (!cache_addr) {
+        if (!cpu_addr) {
             Deduction result{};
             result.type = DeductionType::DeductionFailed;
             return result;
         }
 
-        if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
+        if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
             TSurface& current_surface = iter->second;
             const auto topological_result = current_surface->MatchesTopology(params);
             if (topological_result != MatchTopologyResult::FullMatch) {
@@ -880,7 +873,7 @@ private:
         }
 
         const std::size_t candidate_size = params.GetGuestSizeInBytes();
-        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
+        auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
 
         if (overlaps.empty()) {
             Deduction result{};
@@ -1024,10 +1017,10 @@ private:
     }
 
     void RegisterInnerCache(TSurface& surface) {
-        const CacheAddr cache_addr = surface->GetCacheAddr();
-        CacheAddr start = cache_addr >> registry_page_bits;
-        const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
-        l1_cache[cache_addr] = surface;
+        const VAddr cpu_addr = surface->GetCpuAddr();
+        VAddr start = cpu_addr >> registry_page_bits;
+        const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
+        l1_cache[cpu_addr] = surface;
         while (start <= end) {
             registry[start].push_back(surface);
             start++;
@@ -1035,10 +1028,10 @@ private:
     }
 
     void UnregisterInnerCache(TSurface& surface) {
-        const CacheAddr cache_addr = surface->GetCacheAddr();
-        CacheAddr start = cache_addr >> registry_page_bits;
-        const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
-        l1_cache.erase(cache_addr);
+        const VAddr cpu_addr = surface->GetCpuAddr();
+        VAddr start = cpu_addr >> registry_page_bits;
+        const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
+        l1_cache.erase(cpu_addr);
         while (start <= end) {
             auto& reg{registry[start]};
             reg.erase(std::find(reg.begin(), reg.end(), surface));
@@ -1046,18 +1039,18 @@ private:
         }
     }
 
-    std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
+    std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
         if (size == 0) {
             return {};
         }
-        const CacheAddr cache_addr_end = cache_addr + size;
-        CacheAddr start = cache_addr >> registry_page_bits;
-        const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
+        const VAddr cpu_addr_end = cpu_addr + size;
+        VAddr start = cpu_addr >> registry_page_bits;
+        const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
         std::vector<TSurface> surfaces;
         while (start <= end) {
             std::vector<TSurface>& list = registry[start];
             for (auto& surface : list) {
-                if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) {
+                if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
                     surface->MarkAsPicked(true);
                     surfaces.push_back(surface);
                 }
@@ -1146,14 +1139,14 @@ private:
     // large in size.
     static constexpr u64 registry_page_bits{20};
     static constexpr u64 registry_page_size{1 << registry_page_bits};
-    std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
+    std::unordered_map<VAddr, std::vector<TSurface>> registry;
 
     static constexpr u32 DEPTH_RT = 8;
     static constexpr u32 NO_RT = 0xFFFFFFFF;
 
     // The L1 Cache is used for fast texture lookup before checking the overlaps
     // This avoids calculating size and other stuffs.
-    std::unordered_map<CacheAddr, TSurface> l1_cache;
+    std::unordered_map<VAddr, TSurface> l1_cache;
 
     /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
     /// previously been used. This is to prevent surfaces from being constantly created and
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
new file mode 100644
index 000000000..d1939d744
--- /dev/null
+++ b/src/video_core/textures/texture.cpp
@@ -0,0 +1,80 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+
+#include "core/settings.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra::Texture {
+
+namespace {
+
+constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
+    0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
+    0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
+    0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
+    0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f,
+    0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f,
+    0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f,
+    0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f,
+    0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f,
+    0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f,
+    0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f,
+    0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f,
+    0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f,
+    0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f,
+    0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f,
+    0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f,
+    0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f,
+    0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f,
+    0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f,
+    0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f,
+    0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f,
+    0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f,
+    0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f,
+    0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f,
+    0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f,
+    0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f,
+    0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f,
+    0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f,
+    0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f,
+    0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f,
+    0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f,
+    0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f,
+    0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
+};
+
+unsigned SettingsMinimumAnisotropy() noexcept {
+    switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
+    default:
+    case Anisotropy::Default:
+        return 1U;
+    case Anisotropy::Filter2x:
+        return 2U;
+    case Anisotropy::Filter4x:
+        return 4U;
+    case Anisotropy::Filter8x:
+        return 8U;
+    case Anisotropy::Filter16x:
+        return 16U;
+    }
+}
+
+} // Anonymous namespace
+
+std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
+    if (!srgb_conversion) {
+        return border_color;
+    }
+    return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
+            SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
+}
+
+float TSCEntry::GetMaxAnisotropy() const noexcept {
+    return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
+}
+
+} // namespace Tegra::Texture
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 7edc4abe1..59b8a5e66 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -8,7 +8,6 @@
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_types.h"
-#include "core/settings.h"
 
 namespace Tegra::Texture {
 
@@ -336,24 +335,9 @@ struct TSCEntry {
         std::array<u8, 0x20> raw;
     };
 
-    float GetMaxAnisotropy() const {
-        const u32 min_value = [] {
-            switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
-            default:
-            case Anisotropy::Default:
-                return 1U;
-            case Anisotropy::Filter2x:
-                return 2U;
-            case Anisotropy::Filter4x:
-                return 4U;
-            case Anisotropy::Filter8x:
-                return 8U;
-            case Anisotropy::Filter16x:
-                return 16U;
-            }
-        }();
-        return static_cast<float>(std::max(1U << max_anisotropy, min_value));
-    }
+    std::array<float, 4> GetBorderColor() const noexcept;
+
+    float GetMaxAnisotropy() const noexcept;
 
     float GetMinLod() const {
         return static_cast<float>(min_lod_clamp) / 256.0f;
@@ -368,15 +352,6 @@ struct TSCEntry {
         constexpr u32 mask = 1U << (13 - 1);
         return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
     }
-
-    std::array<float, 4> GetBorderColor() const {
-        if (srgb_conversion) {
-            return {static_cast<float>(srgb_border_color_r) / 255.0f,
-                    static_cast<float>(srgb_border_color_g) / 255.0f,
-                    static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]};
-        }
-        return border_color;
-    }
 };
 static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");